1 /**
2  * libxml2_uri.c: set of generic URI related routines
3  *
4  * Reference: RFC 2396
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  * Portion Copyright � 2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
10  */
11 
12 #define IN_LIBXML
13 #include "xmlenglibxml.h"
14 
15 #include <string.h>
16 
17 #include <libxml2_uri.h>
18 #include <libxml2_globals.h>
19 
20 /************************************************************************
21  *                                                                      *
22  *      Macros to differentiate various character type                  *
23  *          directly extracted from RFC 2396                            *
24  *                                                                      *
25  ************************************************************************/
26 
27 /*
28  * alpha    = lowalpha | upalpha
29  */
30 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
31 
32 
33 /*
34  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
35  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
36  *            "u" | "v" | "w" | "x" | "y" | "z"
37  */
38 
39 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
40 
41 /*
42  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
43  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
44  *           "U" | "V" | "W" | "X" | "Y" | "Z"
45  */
46 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
47 
48 /*
49  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50  */
51 
52 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
53 
54 /*
55  * alphanum = alpha | digit
56  */
57 
58 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
59 
60 /*
61  * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
62  *               "a" | "b" | "c" | "d" | "e" | "f"
63  */
64 
65 #define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
66         (((x) >= 'A') && ((x) <= 'F')))
67 
68 /*
69  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
70  */
71 
72 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
73     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
74     ((x) == '(') || ((x) == ')'))
75 
76 
77 /*
78  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
79  */
80 
81 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
82         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
83     ((x) == '+') || ((x) == '$') || ((x) == ','))
84 
isReserved(char ch)85 static int isReserved(char ch)
86     {
87     return IS_RESERVED(ch);
88     }
89 
90 /*
91  * unreserved = alphanum | mark
92  */
93 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
94 
isUnreserved(char ch)95 static int isUnreserved(char ch)
96     {
97     return IS_UNRESERVED(ch);
98     }
99 /*
100  * escaped = "%" hex hex
101  */
102 
103 #define IS_ESCAPED(p) \
104             ((*(p) == '%') && (IS_HEX((p)[1])) && (IS_HEX((p)[2])))
105 
isEscaped(const char * pch)106 static int isEscaped(const char* pch)
107     {
108     return IS_ESCAPED(pch);
109     }
110 /*
111  * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
112  *                        "&" | "=" | "+" | "$" | ","
113  */
114 /*
115 #define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
116             ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
117             ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
118             ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
119 
120 Optimized into:
121 
122 #define IS_URIC_NO_SLASH(p) \
123             ((IS_UNRESERVED(*(p))) || \
124              (IS_ESCAPED(p))       || \
125              ((*(p)) != '/' && IS_RESERVED(p)))
126 */
isUriCNoSlash(const char * pch)127 static int isUriCNoSlash(const char* pch)
128     {
129     char ch = *pch;
130     return isUnreserved(ch) ||
131            isEscaped(pch)     ||
132            (ch != '/' && isReserved(ch));
133     }
134 
135 
136 /*
137  * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
138  */
139 
140 /*
141 #define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||    \
142             ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
143             ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
144             ((*(p) == ',')))
145 */
isPChar(const char * pch)146 static int isPChar(const char* pch)
147     {
148     char ch = *pch;
149     return isUnreserved(ch) ||
150            isEscaped(pch)   ||
151            ch == ':' || ch == '@' || ch == '&' ||
152            ch == '=' || ch == '+' || ch == '$' ||
153            ch == ',';
154     }
155 
156 
157 /*
158  * rel_segment   = 1*( unreserved | escaped |
159  *                 ";" | "@" | "&" | "=" | "+" | "$" | "," )
160  */
161 
162 /*
163 #define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||  \
164             ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||  \
165             ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||  \
166             ((*(p) == ',')))
167 */
isSegment(const char * pch)168 static int isSegment(const char* pch)
169     {
170     char ch = *pch;
171     return isUnreserved(ch)  || isEscaped(pch) ||
172            ch == ';' || ch == '@' || ch == '&' ||
173            ch == '=' || ch == '+' || ch == '$' ||
174            ch == ',';
175     }
176 /*
177  * scheme = alpha *( alpha | digit | "+" | "-" | "." )
178  */
179 
180 #define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) ||         \
181                   ((x) == '+') || ((x) == '-') || ((x) == '.'))
182 
183 /* the macros is used only once
184 static int isScheme(char x)
185     {
186     return ((IS_ALPHA(x)) || (IS_DIGIT(x)) ||
187             (x) == '+') || ((x) == '-') || ((x) == '.'))
188     }
189 */
190 
191 /*
192  * reg_name = 1*( unreserved | escaped | "$" | "," |
193  *                ";" | ":" | "@" | "&" | "=" | "+" )
194  */
195 
196 /*
197 #define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
198        ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||     \
199        ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||     \
200        ((*(p) == '=')) || ((*(p) == '+')))
201 */
202 
isRegName(const char * pch)203 static int isRegName(const char* pch)
204     {
205     char ch = *pch;
206     return isUnreserved(ch) || isEscaped(pch) ||
207            ch == '$' || ch == ',' || ch == ';' ||
208            ch == ':' || ch == '@' || ch == '&' ||
209            ch == '=' || ch == '+';
210     }
211 /*
212  * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
213  *                      "+" | "$" | "," )
214  */
215 
216 /*
217 #define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
218        ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) ||     \
219        ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||     \
220        ((*(p) == ',')))
221 */
222 
223 
isUserInfo(const char * pch)224 static int isUserInfo(const char* pch)
225     {
226     char ch = *pch;
227     return
228             isUnreserved(ch) ||
229             isEscaped(pch)   ||
230             ch == ';' || ch == ':' || ch == '&' ||
231             ch == '=' || ch == '+' || ch == '$' ||
232             ch == ',';
233     }
234 
235 /*
236  * uric = reserved | unreserved | escaped
237  */
238 
239 //#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || (IS_RESERVED(*(p))))
240 
isUriC(const char * pch)241 static int isUriC(const char* pch)
242     {
243     char ch = *pch;
244     return isUnreserved(ch) || isEscaped(pch) || isReserved(ch);
245     }
246 
247 /*
248 * unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
249 */
250 
251 /*
252 #define IS_UNWISE(p)                                                    \
253       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
254        ((*(p) == '\\'))|| ((*(p) == '^')) || ((*(p) == '[')) ||        \
255        ((*(p) == ']')) || ((*(p) == '`')))
256 */
257 
isUnwise(char ch)258 static int isUnwise(char ch)
259     {
260     return
261        ch == '{'  || ch == '}' || ch == '|' ||
262        ch == '\\' || ch == '^' || ch == '[' ||
263        ch == ']'  || ch == '`';
264     }
265 /*
266  * Skip to next pointer char, handle escaped sequences
267  */
268 
269 #define NEXT(p) ((*p == '%')? p += 3 : p++)
270 
271 /*
272  * Productions from the spec.
273  *
274  *    authority     = server | reg_name
275  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
276  *                        ";" | ":" | "@" | "&" | "=" | "+" )
277  *
278  * path          = [ abs_path | opaque_part ]
279  */
280 
281 /************************************************************************
282  *                                                                      *
283  *          Generic URI structure functions                             *
284  *                                                                      *
285  ************************************************************************/
286 
287 /**
288  * xmlCreateURI:
289  *
290  * Simply creates an empty xmlURI
291  *
292  * Returns the new structure or NULL in case of error
293  *
294  * OOM: possible --> returns NULL , sets OOM flag
295  */
296 XMLPUBFUNEXPORT xmlURIPtr
xmlCreateURI(void)297 xmlCreateURI(void) {
298     xmlURIPtr ret;
299 
300     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
301     if (ret == NULL) {
302         xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlCreateURI: out of memory\n"));
303         return(NULL);
304     }
305     memset(ret, 0, sizeof(xmlURI));
306     return(ret);
307 }
308 
309 /**
310  * xmlSaveUri:
311  * @param uri pointer to an xmlURI
312  *
313  * Save the URI as an escaped string
314  *
315  * Returns a new string (to be deallocated by caller)
316  *
317  * OOM:
318  */
319 XMLPUBFUNEXPORT xmlChar*
xmlSaveUri(xmlURIPtr uri)320 xmlSaveUri(xmlURIPtr uri)
321 {
322     xmlChar* ret;// = NULL;
323     const char* p;
324     int len;
325     int max;
326     char ch;
327 
328     if (!uri)
329         return(NULL);
330 
331     max = 80;
332     ret = (xmlChar*) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
333     if (!ret)
334         goto OOM;
335 
336     len = 0;
337 
338     if (uri->scheme != NULL) {
339         p = uri->scheme;
340 
341         while (*p != 0) {
342             if (len >= max) {
343                 xmlChar* tmp;
344                 max *= 2;
345                 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
346                 if (!tmp)
347                     goto OOM;
348                 ret = tmp;
349             }
350             ret[len++] = *p++;
351         }
352         if (len >= max) {
353             xmlChar* tmp;
354             max *= 2;
355             tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
356             if (!tmp)
357                 goto OOM;
358             ret = tmp;
359         }
360         ret[len++] = ':';
361     }
362     if (uri->opaque != NULL) {
363         p = uri->opaque;
364         while (*p != 0) {
365             if (len + 3 >= max) {
366                 xmlChar* tmp;
367                 max *= 2;
368                 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
369                 if (!tmp)
370                     goto OOM;
371                 ret = tmp;
372             }
373             if (isReserved(*p) || isUnreserved(*p))
374             {
375                 ret[len++] = *p++;
376             }
377             else
378             {
379                 int val = *(unsigned char*)p++;
380                 int hi = val / 0x10;
381                 int lo = val % 0x10;
382                 ret[len++] = '%';
383                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
384                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
385             }
386         }
387     }
388     else
389     {
390         if (uri->server != NULL) {
391             if (len + 3 >= max) {
392                 xmlChar* tmp;
393                 max *= 2;
394                 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
395                 if (!tmp)
396                     goto OOM;
397                 ret = tmp;
398             }
399             ret[len++] = '/';
400             ret[len++] = '/';
401             if (uri->user != NULL) {
402                 p = uri->user;
403 
404                 while (*p != 0) {
405                     if (len + 3 >= max) {
406                         xmlChar* tmp;
407                         max *= 2;
408                         tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
409                         if (!tmp)
410                             goto OOM;
411                         ret = tmp;
412                     }
413                     ch = *p;
414                     if (isUnreserved(ch) ||
415                         ch == ';' || ch == ':' ||
416                         ch == '&' || ch == '=' ||
417                         ch == '+' || ch == '$' ||
418                         ch == ',')
419                     {
420                         ret[len++] = *p++;
421                     }
422                     else
423                     {
424                         int val = *(unsigned char *)p++;
425                         int hi = val / 0x10, lo = val % 0x10;
426                         ret[len++] = '%';
427                         ret[len++] = hi + (hi > 9? 'A'-10 : '0');
428                         ret[len++] = lo + (lo > 9? 'A'-10 : '0');
429                     }
430                 }
431                 if (len + 3 >= max) {
432                     xmlChar* tmp;
433                     max *= 2;
434                     tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
435                     if (!tmp)
436                         goto OOM;
437                     ret = tmp;
438                 }
439                 ret[len++] = '@';
440             }
441             p = uri->server;
442             while (*p != 0) {
443                 if (len >= max) {
444                     xmlChar* tmp;
445                     max *= 2;
446                     tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
447                     if (!tmp)
448                         goto OOM;
449                     ret = tmp;
450                 }
451                 ret[len++] = *p++;
452             }
453             if (uri->port > 0) {
454                 if (len + 10 >= max) {
455                     xmlChar* tmp;
456                     max *= 2;
457                     tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
458                     if (!tmp)
459                         goto OOM;
460                     ret = tmp;
461                 }
462                 len += snprintf((char*) &ret[len], max - len, ":%d", uri->port);
463             }
464         }
465         else if (uri->authority != NULL) {
466             if (len + 3 >= max) {
467                 xmlChar* tmp;
468                 max *= 2;
469                 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
470                 if (!tmp)
471                     goto OOM;
472                 ret = tmp;
473             }
474             ret[len++] = '/';
475             ret[len++] = '/';
476             p = uri->authority;
477             while (*p != 0) {
478                 if (len + 3 >= max) {
479                     xmlChar* tmp;
480                     max *= 2;
481                     tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
482                     if (!tmp)
483                         goto OOM;
484                     ret = tmp;
485                 }
486                 ch = *p;
487                 if (isUnreserved(ch) ||
488                             ch == '$' || ch == ',' || ch == ';' ||
489                             ch == ':' || ch == '@' || ch == '&' ||
490                             ch == '=' || ch == '+')
491                 {
492                     ret[len++] = *p++;
493                 }
494                 else
495                 {
496                     int val = *(unsigned char *)p++;
497                     int hi = val / 0x10;
498                     int lo = val % 0x10;
499                     ret[len++] = '%';
500                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
501                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
502                 }
503             }
504     }
505     else if (uri->scheme != NULL) {
506         if (len + 3 >= max) {
507             xmlChar* tmp;
508             max *= 2;
509             tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
510             if (!tmp)
511                 goto OOM;
512             ret = tmp;
513         }
514         ret[len++] = '/';
515         ret[len++] = '/';
516     }
517     if (uri->path != NULL) {
518         p = uri->path;
519         while (*p != 0) {
520             if (len + 3 >= max) {
521                 xmlChar* tmp;
522                 max *= 2;
523                 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
524                 if (!tmp)
525                     goto OOM;
526                 ret = tmp;
527             }
528             ch = *p;
529             if (isUnreserved(ch) || ch == '/' ||
530                 ch == ';' || ch == '@' || ch == '&' ||
531                 ch == '=' || ch == '+' || ch == '$' ||
532                 ch == ',')
533             {
534                 ret[len++] = *p++;
535             }
536             else
537             {
538                 int val = *(unsigned char *)p++;
539                 int hi = val / 0x10;
540                 int lo = val % 0x10;
541                 ret[len++] = '%';
542                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
543                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
544             }
545         }
546     }
547     if (uri->query != NULL) {
548 
549         if (len + 3 >= max) {
550             xmlChar* tmp;
551             max *= 2;
552             tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
553             if (!tmp)
554                 goto OOM;
555             ret = tmp;
556         }
557         ret[len++] = '?';
558         p = uri->query;
559         while (*p != 0) {
560             if (len + 3 >= max) {
561                 xmlChar* tmp;
562                 max *= 2;
563                 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
564                 if (!tmp)
565                     goto OOM;
566                 ret = tmp;
567             }
568             if (isUnreserved(*p) || isReserved(*p))
569             {
570                 ret[len++] = *p++;
571             }
572             else
573             {
574                 int val = *(unsigned char *)p++;
575                 int hi = val / 0x10;
576                 int lo = val % 0x10;
577                 ret[len++] = '%';
578                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
579                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
580             }
581         }
582     }
583     }
584     if (uri->fragment != NULL) {
585         if (len + 3 >= max) {
586             xmlChar* tmp;
587             max *= 2;
588             tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
589             if (!tmp)
590                 goto OOM;
591             ret = tmp;
592         }
593         ret[len++] = '#';
594         p = uri->fragment;
595 
596         while (*p != 0) {
597             if (len + 3 >= max) {
598                 xmlChar* tmp;
599                 max *= 2;
600                 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
601                 if (!tmp)
602                     goto OOM;
603                 ret = tmp;
604             }
605             if (isUnreserved(*p) || isReserved(*p))
606             {
607                 ret[len++] = *p++;
608             }
609             else
610             {
611                 int val = *(unsigned char *)p++;
612                 int hi = val / 0x10;
613                 int lo = val % 0x10;
614                 ret[len++] = '%';
615                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
616                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
617             }
618         }
619     }
620     if (len >= max) {
621         xmlChar* tmp;
622         max *= 2;
623         tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
624         if (!tmp)
625             goto OOM;
626         ret = tmp;
627     }
628     ret[len++] = 0;
629     return(ret);
630 //------------------------------
631 OOM:
632     if(ret)
633         xmlFree(ret);
634     xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlSaveUri: out of memory\n"));
635     return(NULL);
636 }
637 
638 #ifndef XMLENGINE_EXCLUDE_FILE_FUNC
639 /**
640  * xmlPrintURI:
641  * @param stream a FILE* for the output
642  * @param uri pointer to an xmlURI
643  *
644  * Prints the URI in the stream stream.
645  */
646 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)647 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
648     xmlChar *out;
649 
650     out = xmlSaveUri(uri);
651     if (out != NULL) {
652         fprintf(stream, "%s", (char *) out);
653         xmlFree(out);
654     }
655 }
656 #endif
657 
658 /**
659  * xmlCleanURI:
660  * @param uri pointer to an xmlURI
661  *
662  * Make sure the xmlURI struct is free of content
663  *
664  * OOM: never
665  */
666 static void
xmlCleanURI(xmlURIPtr uri)667 xmlCleanURI(xmlURIPtr uri) {
668     if (uri == NULL) return;
669 
670     if (uri->scheme){
671         xmlFree(uri->scheme);
672         uri->scheme = NULL;
673     }
674     if (uri->server){
675         xmlFree(uri->server);
676         uri->server = NULL;
677     }
678     if (uri->user){
679         xmlFree(uri->user);
680         uri->user = NULL;
681     }
682     if (uri->path) {
683         xmlFree(uri->path);
684         uri->path = NULL;
685     }
686     if (uri->fragment) {
687         xmlFree(uri->fragment);
688         uri->fragment = NULL;
689     }
690     if (uri->opaque) {
691         xmlFree(uri->opaque);
692         uri->opaque = NULL;
693     }
694     if (uri->authority) {
695         xmlFree(uri->authority);
696         uri->authority = NULL;
697     }
698     if (uri->query) {
699         xmlFree(uri->query);
700         uri->query = NULL;
701     }
702 }
703 
704 /**
705  * xmlFreeURI:
706  * @param uri pointer to an xmlURI
707  *
708  * Free up the xmlURI struct
709  */
710 XMLPUBFUNEXPORT void
xmlFreeURI(xmlURIPtr uri)711 xmlFreeURI(xmlURIPtr uri) {
712     if (uri == NULL) return;
713 
714     if (uri->scheme) xmlFree(uri->scheme);
715     if (uri->server) xmlFree(uri->server);
716     if (uri->user  ) xmlFree(uri->user);
717     if (uri->path  ) xmlFree(uri->path);
718     if (uri->fragment) xmlFree(uri->fragment);
719     if (uri->opaque) xmlFree(uri->opaque);
720     if (uri->authority) xmlFree(uri->authority);
721     if (uri->query) xmlFree(uri->query);
722     xmlFree(uri);
723 }
724 
725 /************************************************************************
726  *                                                                      *
727  *          Helper functions                                            *
728  *                                                                      *
729  ************************************************************************/
730 
731 /**
732  * xmlNormalizeURIPath:
733  * @param path pointer to the path string
734  *
735  * Applies the 5 normalization steps to a path string--that is, RFC 2396
736  * Section 5.2, steps 6.c through 6.g.
737  *
738  * Normalization occurs directly on the string, no new allocation is done
739  *
740  * Returns 0 or an error code
741  */
742 XMLPUBFUNEXPORT int
xmlNormalizeURIPath(char * path)743 xmlNormalizeURIPath(char *path) {
744     char *cur, *out;
745 
746     if (path == NULL)
747         return(-1);
748 
749     /* Skip all initial "/" chars.  We want to get to the beginning of the
750      * first non-empty segment.
751      */
752     cur = path;
753     while (cur[0] == '/')
754       ++cur;
755     if (cur[0] == '\0')
756       return(0);
757 
758     /* Keep everything we've seen so far.  */
759     out = cur;
760 
761     /*
762      * Analyze each segment in sequence for cases (c) and (d).
763      */
764     while (cur[0] != '\0') {
765     /*
766      * c) All occurrences of "./", where "." is a complete path segment,
767      *    are removed from the buffer string.
768      */
769     if ((cur[0] == '.') && (cur[1] == '/')) {
770         cur += 2;
771         /* '//' normalization should be done at this point too */
772         while (cur[0] == '/')
773         cur++;
774         continue;
775     }
776 
777     /*
778      * d) If the buffer string ends with "." as a complete path segment,
779      *    that "." is removed.
780      */
781     if ((cur[0] == '.') && (cur[1] == '\0'))
782         break;
783 
784     /* Otherwise keep the segment.  */
785     while (cur[0] != '/') {
786             if (cur[0] == '\0')
787               goto done_cd;
788         (out++)[0] = (cur++)[0];
789     }
790     /* nomalize // */
791     while ((cur[0] == '/') && (cur[1] == '/'))
792         cur++;
793 
794         (out++)[0] = (cur++)[0];
795     }
796  done_cd:
797     out[0] = '\0';
798 
799     /* Reset to the beginning of the first segment for the next sequence.  */
800     cur = path;
801     while (cur[0] == '/')
802       ++cur;
803     if (cur[0] == '\0')
804         return(0);
805 
806     /*
807      * Analyze each segment in sequence for cases (e) and (f).
808      *
809      * e) All occurrences of "<segment>/../", where <segment> is a
810      *    complete path segment not equal to "..", are removed from the
811      *    buffer string.  Removal of these path segments is performed
812      *    iteratively, removing the leftmost matching pattern on each
813      *    iteration, until no matching pattern remains.
814      *
815      * f) If the buffer string ends with "<segment>/..", where <segment>
816      *    is a complete path segment not equal to "..", that
817      *    "<segment>/.." is removed.
818      *
819      * To satisfy the "iterative" clause in (e), we need to collapse the
820      * string every time we find something that needs to be removed.  Thus,
821      * we don't need to keep two pointers into the string: we only need a
822      * "current position" pointer.
823      */
824     while (1) {
825         char *segp, *tmp;
826 
827         /* At the beginning of each iteration of this loop, "cur" points to
828          * the first character of the segment we want to examine.
829          */
830 
831         /* Find the end of the current segment.  */
832         segp = cur;
833         while ((segp[0] != '/') && (segp[0] != '\0'))
834           ++segp;
835 
836         /* If this is the last segment, we're done (we need at least two
837          * segments to meet the criteria for the (e) and (f) cases).
838          */
839         if (segp[0] == '\0')
840           break;
841 
842         /* If the first segment is "..", or if the next segment _isn't_ "..",
843          * keep this segment and try the next one.
844          */
845         ++segp;
846         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
847             || ((segp[0] != '.') || (segp[1] != '.')
848             || ((segp[2] != '/') && (segp[2] != '\0')))) {
849           cur = segp;
850           continue;
851         }
852 
853         /* If we get here, remove this segment and the next one and back up
854          * to the previous segment (if there is one), to implement the
855          * "iteratively" clause.  It's pretty much impossible to back up
856          * while maintaining two pointers into the buffer, so just compact
857          * the whole buffer now.
858          */
859 
860         /* If this is the end of the buffer, we're done.  */
861         if (segp[2] == '\0') {
862           cur[0] = '\0';
863           break;
864         }
865         /* Valgrind complained, strcpy(cur, segp + 3); */
866     /* string will overlap, do not use strcpy */
867     tmp = cur;
868     segp += 3;
869     while ((*tmp++ = *segp++) != 0) {} // NOTE: no loop body here
870 
871         /* If there are no previous segments, then keep going from here.  */
872         segp = cur;
873         while ((segp > path) && ((--segp)[0] == '/'))
874           ;
875         if (segp == path)
876           continue;
877 
878         /* "segp" is pointing to the end of a previous segment; find it's
879          * start.  We need to back up to the previous segment and start
880          * over with that to handle things like "foo/bar/../..".  If we
881          * don't do this, then on the first pass we'll remove the "bar/..",
882          * but be pointing at the second ".." so we won't realize we can also
883          * remove the "foo/..".
884          */
885         cur = segp;
886         while ((cur > path) && (cur[-1] != '/'))
887           --cur;
888     }
889     out[0] = '\0';
890 
891     /*
892      * g) If the resulting buffer string still begins with one or more
893      *    complete path segments of "..", then the reference is
894      *    considered to be in error. Implementations may handle this
895      *    error by retaining these components in the resolved path (i.e.,
896      *    treating them as part of the final URI), by removing them from
897      *    the resolved path (i.e., discarding relative levels above the
898      *    root), or by avoiding traversal of the reference.
899      *
900      * We discard them from the final path.
901      */
902     if (path[0] == '/') {
903       cur = path;
904       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
905              && ((cur[3] == '/') || (cur[3] == '\0')))
906     cur += 3;
907 
908       if (cur != path) {
909     out = path;
910     while (cur[0] != '\0')
911           (out++)[0] = (cur++)[0];
912     out[0] = 0;
913       }
914     }
915 
916     return(0);
917 }
918 
is_hex(char c)919 static int is_hex(char c) {
920     if (((c >= '0') && (c <= '9')) ||
921         ((c >= 'a') && (c <= 'f')) ||
922         ((c >= 'A') && (c <= 'F')))
923     return(1);
924     return(0);
925 }
926 
927 /**
928  * xmlURIUnescapeString:
929  * @param str the string to unescape
930  * @param len the length in bytes to unescape (or <= 0 to indicate full string)
931  * @param target optional destination buffer
932  *
933  * Unescaping routine, does not do validity checks !
934  * Output is direct unsigned char translation of %XX values (no encoding)
935  *
936  * Returns an copy of the string, but unescaped
937  *
938  * OOM: possible --> sets OOM when returns NULL for target==NULL
939  */
940 XMLPUBFUNEXPORT char*
xmlURIUnescapeString(const char * str,int len,char * target)941 xmlURIUnescapeString(const char* str, int len, char* target) {
942     char* ret;
943     char* out;
944     const char *in;
945 
946     if (!str)
947         return(NULL);
948     if (len <= 0)
949         len = strlen(str);
950     if (len < 0)
951         return(NULL);
952 
953     if (!target) {
954         ret = (char*) xmlMallocAtomic(len + 1); // may set OOM
955         if (!ret) {
956             xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlURIUnescapeString: out of memory\n"));
957             return(NULL);
958         }
959     } else
960         ret = target;
961 
962     in = str;
963     out = ret;
964     while(len > 0) {
965         if ((*in == '%') && (is_hex(in[1])) && (is_hex(in[2])))
966         {
967             in++;
968             if ((*in >= '0') && (*in <= '9'))
969                 *out = (*in - '0');
970             else if ((*in >= 'a') && (*in <= 'f'))
971                 *out = (*in - 'a') + 10;
972             else if ((*in >= 'A') && (*in <= 'F'))
973                 *out = (*in - 'A') + 10;
974             in++;
975             if ((*in >= '0') && (*in <= '9'))
976                 *out = *out * 16 + (*in - '0');
977             else if ((*in >= 'a') && (*in <= 'f'))
978                 *out = *out * 16 + (*in - 'a') + 10;
979             else if ((*in >= 'A') && (*in <= 'F'))
980                 *out = *out * 16 + (*in - 'A') + 10;
981             in++;
982             len -= 3;
983             out++;
984         } else {
985             *out++ = *in++;
986             len--;
987         }
988     }
989     *out = 0;
990     return(ret);
991 }
992 
993 /**
994  * xmlURIEscapeStr:
995  * @param str string to escape
996  * @param list exception list string of chars not to escape
997  *
998  * This routine escapes a string to hex, ignoring reserved characters (a-z)
999  * and the characters in the exception list.
1000  *
1001  * Returns a new escaped string or NULL in case of error.
1002  */
1003 XMLPUBFUNEXPORT xmlChar*
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1004 xmlURIEscapeStr(const xmlChar* str, const xmlChar* list)
1005 {
1006     xmlChar* ret;
1007     xmlChar  ch;
1008     const xmlChar *in;
1009 
1010     unsigned int len, out;
1011 
1012     if (!str)
1013         return(NULL);
1014     len = xmlStrlen(str);
1015     if (!(len > 0)) return(NULL);
1016 
1017     len += 20;
1018     ret = (xmlChar*) xmlMallocAtomic(len);
1019     if (!ret)
1020         goto OOM;
1021 
1022     in = (const xmlChar *) str;
1023     out = 0;
1024     while(*in != 0) {
1025         if (len - out <= 3) {
1026             xmlChar* tmp;
1027             len += 20;
1028             tmp = (xmlChar*) xmlRealloc(ret, len); // DONE: Fix xmlRealloc
1029             if (!tmp)
1030                 goto OOM;
1031             ret = tmp;
1032         }
1033 
1034         ch = *in;
1035 
1036         if ((ch != '@') && (!isUnreserved(ch)) && (!xmlStrchr(list, ch))) {
1037             unsigned char val;
1038             ret[out++] = '%';
1039             val = ch >> 4;
1040             if (val <= 9)
1041                 ret[out++] = '0' + val;
1042             else
1043                 ret[out++] = 'A' + val - 0xA;
1044             val = ch & 0xF;
1045             if (val <= 9)
1046                 ret[out++] = '0' + val;
1047             else
1048                 ret[out++] = 'A' + val - 0xA;
1049             in++;
1050         } else {
1051             ret[out++] = *in++;
1052         }
1053     } // while(*in != 0)
1054     ret[out] = 0;
1055     return(ret);
1056 //---------------------
1057 OOM:
1058     if(ret)
1059         xmlFree(ret);
1060     xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlURIEscapeStr: out of memory\n"));
1061     return(NULL);
1062 }
1063 
1064 /**
1065  * xmlURIEscape:
1066  * @param str the string of the URI to escape
1067  *
1068  * Escaping routine, does not do validity checks !
1069  * It will try to escape the chars needing this, but this is heuristic
1070  * based it's impossible to be sure.
1071  *
1072  * Returns an copy of the string, but escaped
1073  *
1074  * 25 May 2001
1075  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1076  * according to RFC2396.
1077  *   - Carl Douglas
1078  */
1079 XMLPUBFUNEXPORT xmlChar*
xmlURIEscape(const xmlChar * str)1080 xmlURIEscape(const xmlChar * str)
1081 {
1082     xmlChar* ret;
1083     xmlChar* segment;
1084     xmlURIPtr uri;
1085     int ret2;
1086 
1087 #define NULLCHK(p) if(!p) goto OOM
1088 
1089 
1090     if (str == NULL)
1091         return (NULL);
1092 
1093     uri = xmlCreateURI();
1094     if (uri != NULL) {
1095     /*
1096      * Allow escaping errors in the unescaped form
1097      */
1098         uri->cleanup = 1;
1099         ret2 = xmlParseURIReference(uri, (const char *)str);
1100         if (ret2) {
1101             xmlFreeURI(uri);
1102             return (NULL);
1103         }
1104     }
1105 
1106     if (!uri)
1107         return NULL;
1108 
1109     ret = NULL;
1110 
1111     if (uri->scheme) {
1112         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1113         NULLCHK(segment);
1114         ret = xmlStrcat(ret, segment);
1115         ret = xmlStrcat(ret, BAD_CAST ":");
1116         xmlFree(segment);
1117     }
1118 
1119     if (uri->authority) {
1120         segment = xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1121         NULLCHK(segment);
1122         ret = xmlStrcat(ret, BAD_CAST "//");
1123         ret = xmlStrcat(ret, segment);
1124         xmlFree(segment);
1125     }
1126 
1127     if (uri->user) {
1128         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1129         NULLCHK(segment);
1130         ret = xmlStrcat(ret,BAD_CAST "//");
1131         ret = xmlStrcat(ret, segment);
1132         ret = xmlStrcat(ret, BAD_CAST "@");
1133         xmlFree(segment);
1134     }
1135 
1136     if (uri->server) {
1137         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1138         NULLCHK(segment);
1139         if (uri->user == NULL)
1140             ret = xmlStrcat(ret, BAD_CAST "//");
1141         ret = xmlStrcat(ret, segment);
1142         xmlFree(segment);
1143     }
1144 
1145     if (uri->port) {
1146         xmlChar port[10];
1147 
1148         snprintf((char *) port, 10, "%d", uri->port);
1149         ret = xmlStrcat(ret, BAD_CAST ":");
1150         ret = xmlStrcat(ret, port);
1151     }
1152 
1153     if (uri->path) {
1154         segment = xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1155         NULLCHK(segment);
1156         ret = xmlStrcat(ret, segment);
1157         xmlFree(segment);
1158     }
1159 
1160     if (uri->query) {
1161         segment = xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1162         NULLCHK(segment);
1163         ret = xmlStrcat(ret, BAD_CAST "?");
1164         ret = xmlStrcat(ret, segment);
1165         xmlFree(segment);
1166     }
1167 
1168     if (uri->opaque) {
1169         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1170         NULLCHK(segment);
1171         ret = xmlStrcat(ret, segment);
1172         xmlFree(segment);
1173     }
1174 
1175     if (uri->fragment) {
1176         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1177         NULLCHK(segment);
1178         ret = xmlStrcat(ret, BAD_CAST "#");
1179         ret = xmlStrcat(ret, segment);
1180         xmlFree(segment);
1181     }
1182 
1183     xmlFreeURI(uri);
1184 #undef NULLCHK
1185 
1186     return (ret);
1187 //--------------------
1188 OOM:
1189     xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlURIEscape: out of memory\n"));
1190     xmlFreeURI(uri);
1191     if(ret) xmlFree(ret);
1192     return NULL;
1193 }
1194 
1195 /************************************************************************
1196  *                                                                      *
1197  *          Escaped URI parsing                                         *
1198  *                                                                      *
1199  ************************************************************************/
1200 
1201 /**
1202  * xmlParseURIFragment:
1203  * @param uri pointer to an URI structure
1204  * @param str pointer to the string to analyze
1205  *
1206  * Parse an URI fragment string and fills in the appropriate fields
1207  * of the uri structure.
1208  *
1209  * fragment = *uric
1210  *
1211  * Returns 0 or the error code
1212  *
1213  * OOM: possible --> returns XML_ERR_NO_MEMORY and sets OOM flag
1214  */
1215 static int
xmlParseURIFragment(xmlURIPtr uri,const char ** str)1216 xmlParseURIFragment(xmlURIPtr uri, const char **str)
1217 {
1218     const char *cur = *str;
1219 
1220     if (str == NULL)
1221         return (-1);
1222 
1223     while (isUriC(cur) || isUnwise(*cur))
1224         NEXT(cur);
1225 
1226     if (uri != NULL) {
1227         if (uri->fragment != NULL)
1228             xmlFree(uri->fragment);
1229         uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1230         if(!uri->fragment)
1231             return XML_ERR_NO_MEMORY;
1232     }
1233     *str = cur;
1234     return (0);
1235 }
1236 
1237 /**
1238  * xmlParseURIQuery:
1239  * @param uri pointer to an URI structure
1240  * @param str pointer to the string to analyze
1241  *
1242  * Parse the query part of an URI
1243  *
1244  * query = *uric
1245  *
1246  * Returns 0 or the error code
1247  *
1248  * OOM: possible for uri!=NULL --> returns XML_ERR_NO_MEMORY and sets OOM flag
1249  */
1250 static int
xmlParseURIQuery(xmlURIPtr uri,const char ** str)1251 xmlParseURIQuery(xmlURIPtr uri, const char **str)
1252 {
1253     const char *cur = *str;
1254 
1255     if (str == NULL)
1256         return (-1);
1257 
1258     while (isUriC(cur) || (uri && uri->cleanup && isUnwise(*cur)))
1259         NEXT(cur);
1260     if (uri != NULL) {
1261         if (uri->query != NULL)
1262             xmlFree(uri->query);
1263         uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1264         if(!uri->query)
1265             return XML_ERR_NO_MEMORY;
1266     }
1267     *str = cur;
1268     return (0);
1269 }
1270 
1271 /**
1272  * xmlParseURIScheme:
1273  * @param uri pointer to an URI structure
1274  * @param str pointer to the string to analyze
1275  *
1276  * Parse an URI scheme
1277  *
1278  * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1279  *
1280  * Returns 0 or the error code
1281  *
1282  * OOM: possible --> sets OOM flag returns XML_ERR_NO_MEMORY
1283  */
1284 static int
xmlParseURIScheme(xmlURIPtr uri,const char ** str)1285 xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1286     const char *cur;
1287 
1288     if (str == NULL)
1289         return(-1);
1290 
1291     cur = *str;
1292     if (!IS_ALPHA(*cur))
1293         return(1); // was 2 which is XML_ERR_NO_MEMORY
1294     cur++;
1295     while (IS_SCHEME(*cur))
1296         cur++;
1297     if (uri != NULL) {
1298         if (uri->scheme)
1299             xmlFree(uri->scheme);
1300         /* !!! strndup */
1301         uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM, when returns NULL
1302         if(!uri->scheme)
1303             return XML_ERR_NO_MEMORY;
1304     }
1305     *str = cur;
1306     return(0);
1307 }
1308 
1309 /**
1310  * xmlParseURIOpaquePart:
1311  * @param uri pointer to an URI structure
1312  * @param str pointer to the string to analyze
1313  *
1314  * Parse an URI opaque part
1315  *
1316  * opaque_part = uric_no_slash *uric
1317  *
1318  * Returns 0 or the error code
1319  *
1320  * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1321  */
1322 static int
xmlParseURIOpaquePart(xmlURIPtr uri,const char ** str)1323 xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1324 {
1325     const char* cur;
1326 
1327     if (str == NULL)
1328         return (-1);
1329 
1330     cur = *str;
1331     if (!(isUriCNoSlash(cur) ||
1332         ( (uri != NULL) && (uri->cleanup) && (isUnwise(*cur)) )
1333         ))
1334     {
1335         return (3);
1336     }
1337     NEXT(cur);
1338     while (isUriC(cur) || ((uri != NULL) && (uri->cleanup) && (isUnwise(*cur))))
1339     {
1340         NEXT(cur);
1341     }
1342     if (uri != NULL) {
1343         if (uri->opaque != NULL)
1344             xmlFree(uri->opaque);
1345         uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1346         if(!uri->opaque)
1347             return XML_ERR_NO_MEMORY;
1348     }
1349     *str = cur;
1350     return (0);
1351 }
1352 
1353 /**
1354  * xmlParseURIServer:
1355  * @param uri pointer to an URI structure
1356  * @param str pointer to the string to analyze
1357  *
1358  * Parse a server subpart of an URI, it's a finer grain analysis
1359  * of the authority part.
1360  *
1361  * server        = [ [ userinfo "@" ] hostport ]
1362  * userinfo      = *( unreserved | escaped |
1363  *                       ";" | ":" | "&" | "=" | "+" | "$" | "," )
1364  * hostport      = host [ ":" port ]
1365  * host          = hostname | IPv4address
1366  * hostname      = *( domainlabel "." ) toplabel [ "." ]
1367  * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1368  * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1369  * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
1370  * port          = *digit
1371  *
1372  * Returns 0 or the error code
1373  *
1374  * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1375  */
1376 static int
xmlParseURIServer(xmlURIPtr uri,const char ** str)1377 xmlParseURIServer(xmlURIPtr uri, const char **str) {
1378     const char* cur;
1379     const char* host;
1380     const char* tmp;
1381     const int IPmax = 4;
1382     int oct;
1383 
1384     if (str == NULL)
1385         return(-1);
1386 
1387     cur = *str;
1388 
1389     /*
1390      * is there an userinfo ?
1391      */
1392     while (isUserInfo(cur))
1393     {
1394         NEXT(cur);
1395     }
1396 
1397     if (*cur == '@')
1398     {
1399         if (uri != NULL) {
1400             if (uri->user != NULL)
1401                 xmlFree(uri->user);
1402             uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1403             if(!uri->user)
1404                 return XML_ERR_NO_MEMORY;
1405         }
1406         cur++;
1407     } else {
1408         if (uri != NULL) {
1409             if (uri->user != NULL)
1410                 xmlFree(uri->user);
1411             uri->user = NULL;
1412         }
1413         cur = *str;
1414     }
1415     /*
1416      * This can be empty in the case where there is no server
1417      */
1418     host = cur;
1419     if (*cur == '/') {
1420         if (uri != NULL) {
1421             if (uri->authority != NULL)
1422                 xmlFree(uri->authority);
1423             uri->authority = NULL;
1424             if (uri->server != NULL)
1425                 xmlFree(uri->server);
1426             uri->server = NULL;
1427             uri->port = 0;
1428         }
1429         return(0);
1430     }
1431     /*
1432      * host part of hostport can derive either an IPV4 address
1433      * or an unresolved name. Check the IP first, it easier to detect
1434      * errors if wrong one
1435      */
1436     for (oct = 0; oct < IPmax; ++oct) {
1437         if (*cur == '.')
1438             return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1439         while(IS_DIGIT(*cur))
1440             cur++;
1441         if (oct == (IPmax-1))
1442             continue;
1443         if (*cur != '.')
1444             break;
1445         cur++;
1446     }
1447     if (oct < IPmax || (*cur == '.' && cur++) || IS_ALPHA(*cur)) {
1448         /* maybe host_name */
1449         if (!IS_ALPHANUM(*cur))
1450             return(4); /* e.g. http://xml.$oft */
1451         do {
1452             do ++cur; while (IS_ALPHANUM(*cur));
1453             if (*cur == '-') {
1454                 --cur;
1455                 if (*cur == '.')
1456                     return(5); /* e.g. http://xml.-soft */
1457                 ++cur;
1458                 continue;
1459             }
1460             if (*cur == '.') {
1461                 --cur;
1462                 if (*cur == '-')
1463                     return(6); /* e.g. http://xml-.soft */
1464                 if (*cur == '.')
1465                     return(7); /* e.g. http://xml..soft */
1466                 ++cur;
1467                 continue;
1468             }
1469             break;
1470         } while (1);
1471 
1472         tmp = cur;
1473         if (tmp[-1] == '.')
1474             --tmp; /* e.g. http://xml.$Oft/ */
1475         do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1476         if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1477             return(8); /* e.g. http://xmlsOft.0rg/ */
1478     }
1479     if (uri != NULL) {
1480         if (uri->authority != NULL)
1481             xmlFree(uri->authority);
1482         uri->authority = NULL;
1483         if (uri->server != NULL)
1484             xmlFree(uri->server);
1485         uri->server = xmlURIUnescapeString(host, cur - host, NULL); // may set OOM flag
1486         if(!uri->server)
1487             return XML_ERR_NO_MEMORY;
1488     }
1489     /*
1490      * finish by checking for a port presence.
1491      */
1492     if (*cur == ':') {
1493         cur++;
1494         if (IS_DIGIT(*cur)) {
1495             if (uri != NULL)
1496                 uri->port = 0;
1497             while (IS_DIGIT(*cur)) {
1498                 if (uri != NULL)
1499                     uri->port = uri->port * 10 + (*cur - '0');
1500                 cur++;
1501             }
1502         }
1503     }
1504     *str = cur;
1505     return(0);
1506 }
1507 
1508 /**
1509  * xmlParseURIRelSegment:
1510  * @param uri pointer to an URI structure
1511  * @param str pointer to the string to analyze
1512  *
1513  * Parse an URI relative segment
1514  *
1515  * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1516  *                          "+" | "$" | "," )
1517  *
1518  * Returns 0 or the error code
1519  *
1520  * OOM: possible --> sets OOM and returns XML_ERR_NO_MEMORY
1521  */
1522 static int
xmlParseURIRelSegment(xmlURIPtr uri,const char ** str)1523 xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1524 {
1525     const char *cur;
1526 
1527     if (str == NULL)
1528         return (-1);
1529 
1530     cur = *str;
1531     if (!(isSegment(cur) || ((uri) && (uri->cleanup) && (isUnwise(*cur))))) {
1532         return (3);
1533     }
1534     NEXT(cur);
1535     while (isSegment(cur) || ((uri) && (uri->cleanup) && (isUnwise(*cur))))
1536         NEXT(cur);
1537     if (uri != NULL) {
1538         if (uri->path != NULL)
1539             xmlFree(uri->path);
1540         uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1541         if(!uri->path)
1542             return XML_ERR_NO_MEMORY;
1543     }
1544     *str = cur;
1545     return (0);
1546 }
1547 
1548 /**
1549  * xmlParseURIPathSegments:
1550  * @param uri pointer to an URI structure
1551  * @param str pointer to the string to analyze
1552  * @param slash should we add a leading slash
1553  *
1554  * Parse an URI set of path segments
1555  *
1556  * path_segments = segment *( "/" segment )
1557  * segment       = *pchar *( ";" param )
1558  * param         = *pchar
1559  *
1560  * Returns 0 or the error code
1561  *
1562  * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1563  */
1564 static int
xmlParseURIPathSegments(xmlURIPtr uri,const char ** str,int slash)1565 xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1566 {
1567     const char *cur;
1568 
1569     if (str == NULL)
1570         return (-1);
1571 
1572     cur = *str;
1573 // XMLENGINE: BEGIN REPLACE
1574     for(;;) {
1575     // Replace while(1) for infinite loop
1576     // do {
1577 // XMLENGINE: END REPLACE
1578 
1579         while (isPChar(cur) || (uri && uri->cleanup && (isUnwise(*cur))))
1580         {
1581             NEXT(cur);
1582         }
1583         while (*cur == ';') {
1584             cur++;
1585             while (isPChar(cur) || ((uri != NULL) && (uri->cleanup) && (isUnwise(*cur))))
1586                 NEXT(cur);
1587         }
1588         if (*cur != '/')
1589             break;
1590         cur++;
1591 // XMLENGINE: BEGIN REPLACE
1592     }
1593     //} while (1);
1594 // XMLENGINE: END REPLACE
1595     if (uri != NULL) {
1596         int len, len2 = 0;
1597         char *path;
1598 
1599         /*
1600          * Concat the set of path segments to the current path
1601          */
1602         len = cur - *str;
1603         if (slash)
1604             len++;
1605 
1606         if (uri->path != NULL) {
1607             len2 = strlen(uri->path);
1608             len += len2;
1609         }
1610         path = (char *) xmlMallocAtomic(len + 1); // may set OOM flag
1611         if (path == NULL) {
1612             xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlParseURIPathSegments: out of memory\n"));
1613             *str = cur;
1614             //return (-1);
1615             return XML_ERR_NO_MEMORY;
1616         }
1617         if (uri->path != NULL)
1618             memcpy(path, uri->path, len2);
1619         if (slash) {
1620             path[len2] = '/';
1621             len2++;
1622         }
1623         path[len2] = 0;
1624         if (cur - *str > 0)
1625             xmlURIUnescapeString(*str, cur - *str, &path[len2]); // MAY NOT set OOM flag !!!
1626         if (uri->path != NULL)
1627             xmlFree(uri->path);
1628         uri->path = path;
1629     }
1630     *str = cur;
1631     return (0);
1632 }
1633 
1634 /**
1635  * xmlParseURIAuthority:
1636  * @param uri pointer to an URI structure
1637  * @param str pointer to the string to analyze
1638  *
1639  * Parse the authority part of an URI.
1640  *
1641  * authority = server | reg_name
1642  * server    = [ [ userinfo "@" ] hostport ]
1643  * reg_name  = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1644  *                        "@" | "&" | "=" | "+" )
1645  *
1646  * Note : this is completely ambiguous since reg_name is allowed to
1647  *        use the full set of chars in use by server:
1648  *
1649  *        3.2.1. Registry-based Naming Authority
1650  *
1651  *        The structure of a registry-based naming authority is specific
1652  *        to the URI scheme, but constrained to the allowed characters
1653  *        for an authority component.
1654  *
1655  * Returns 0 or the error code
1656  *
1657  * OOM: possible --> OOM flag is set, returns XML_ERR_NO_MEMORY
1658  */
1659 static int
xmlParseURIAuthority(xmlURIPtr uri,const char ** str)1660 xmlParseURIAuthority(xmlURIPtr uri, const char** str) {
1661     const char *cur;
1662     int ret;
1663 
1664     if (str == NULL)
1665         return(-1);
1666 
1667     cur = *str;
1668 
1669     /*
1670      * try first to parse it as a server string.
1671      */
1672     ret = xmlParseURIServer(uri, str); // may set OOM flag -- returns XML_ERR_NO_MEMORY
1673     if ((ret == 0) && (*str != NULL) &&
1674         ((**str == 0) || (**str == '/') || (**str == '?')))
1675         return(0);
1676     if(ret!=0)
1677         return ret; // error happened, maybe OOM
1678     *str = cur;
1679 
1680     /*
1681      * failed, fallback to reg_name
1682      */
1683     if (!isRegName(cur)) {
1684         return(5);
1685     }
1686     NEXT(cur);
1687     while (isRegName(cur))
1688         NEXT(cur);
1689 
1690     if (uri != NULL) {
1691         if (uri->server != NULL)
1692             xmlFree(uri->server);
1693         uri->server = NULL;
1694         if (uri->user != NULL)
1695             xmlFree(uri->user);
1696         uri->user = NULL;
1697         if (uri->authority != NULL)
1698             xmlFree(uri->authority);
1699         uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM
1700         if(!uri->authority)
1701             return XML_ERR_NO_MEMORY;
1702     }
1703     *str = cur;
1704     return(0);
1705 }
1706 
1707 /**
1708  * xmlParseURIHierPart:
1709  * @param uri pointer to an URI structure
1710  * @param str pointer to the string to analyze
1711  *
1712  * Parse an URI hierarchical part
1713  *
1714  * hier_part = ( net_path | abs_path ) [ "?" query ]
1715  * abs_path = "/"  path_segments
1716  * net_path = "//" authority [ abs_path ]
1717  *
1718  * Returns 0 or the error code
1719  *
1720  * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1721  */
1722 static int
xmlParseURIHierPart(xmlURIPtr uri,const char ** str)1723 xmlParseURIHierPart(xmlURIPtr uri, const char** str) {
1724     int ret;
1725     const char* cur;
1726 
1727     if (!str)
1728         return(-1);
1729 
1730     cur = *str;
1731 
1732     if ((cur[0] == '/') && (cur[1] == '/'))
1733     {
1734         cur += 2;
1735         ret = xmlParseURIAuthority(uri, &cur); // may set OOM flag and return XML_ERR_NO_MEMORY
1736         if (ret != 0)
1737             return(ret);
1738 
1739         if (cur[0] == '/') {
1740             cur++;
1741             ret = xmlParseURIPathSegments(uri, &cur, 1);
1742         }
1743     } else if (cur[0] == '/') {
1744         cur++;
1745         ret = xmlParseURIPathSegments(uri, &cur, 1);
1746     } else {
1747         return(4);
1748     }
1749     if (ret != 0)
1750         return(ret); // there was error, maybe OOM
1751 
1752     if (*cur == '?') {
1753         cur++;
1754         ret = xmlParseURIQuery(uri, &cur); // may set OOM flag
1755         if (ret != 0)
1756             return(ret);
1757     }
1758     *str = cur;
1759     return(0);
1760 }
1761 
1762 /**
1763  * xmlParseAbsoluteURI:
1764  * @param uri pointer to an URI structure
1765  * @param str pointer to the string to analyze
1766  *
1767  * Parse an URI reference string and fills in the appropriate fields
1768  * of the uri structure
1769  *
1770  * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1771  *
1772  * Returns 0 or the error code
1773  *
1774  * OOM: possible --> sets OOM and returns XML_ERR_NO_MEMORY
1775  */
1776 static int
xmlParseAbsoluteURI(xmlURIPtr uri,const char ** str)1777 xmlParseAbsoluteURI(xmlURIPtr uri, const char** str) {
1778     int ret;
1779     const char* cur;
1780 
1781     if (str == NULL)
1782         return(-1);
1783 
1784     cur = *str;
1785 
1786     ret = xmlParseURIScheme(uri, str); // may set OOM and return XML_ERR_NO_MEMORY
1787     if (ret != 0) return(ret);
1788 
1789     if (**str != ':') {
1790         *str = cur;
1791         return(1);
1792     }
1793     (*str)++;
1794     if (**str == '/')
1795         return(xmlParseURIHierPart(uri, str)); // may set OOM flag
1796     return(xmlParseURIOpaquePart(uri, str)); //may set OOM flag
1797 }
1798 
1799 /**
1800  * xmlParseRelativeURI:
1801  * @param uri pointer to an URI structure
1802  * @param str pointer to the string to analyze
1803  *
1804  * Parse an relative URI string and fills in the appropriate fields
1805  * of the uri structure
1806  *
1807  * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1808  * abs_path = "/"  path_segments
1809  * net_path = "//" authority [ abs_path ]
1810  * rel_path = rel_segment [ abs_path ]
1811  *
1812  * Returns 0 or the error code
1813  *
1814  * OOM: possible --> sets OOM flag and return XML_ERR_NO_MEMORY
1815  */
1816 static int
xmlParseRelativeURI(xmlURIPtr uri,const char ** str)1817 xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1818     int ret = 0;
1819     const char *cur;
1820 
1821     if (str == NULL)
1822         return(-1);
1823 
1824     cur = *str;
1825     if ((cur[0] == '/') && (cur[1] == '/')) {
1826         cur += 2;
1827 
1828         ret = xmlParseURIAuthority(uri, &cur); // may set OOM flag
1829         if (ret != 0)
1830             return(ret);
1831         if (cur[0] == '/') {
1832             cur++;
1833             ret = xmlParseURIPathSegments(uri, &cur, 1); // may set OOM flag
1834         }
1835     } else if (cur[0] == '/') {
1836         cur++;
1837         ret = xmlParseURIPathSegments(uri, &cur, 1); // may set OOM flag
1838     } else if (cur[0] != '#' && cur[0] != '?') {
1839         ret = xmlParseURIRelSegment(uri, &cur); // may set OOM flag
1840         if (ret != 0)
1841             return(ret);
1842         if (cur[0] == '/') {
1843             cur++;
1844             ret = xmlParseURIPathSegments(uri, &cur, 1); // may set OOM flag
1845         }
1846     }
1847     if (ret != 0)
1848         return(ret);
1849     if (*cur == '?') {
1850         cur++;
1851         ret = xmlParseURIQuery(uri, &cur); // may set OOM flag
1852         if (ret != 0)
1853             return(ret);
1854     }
1855     *str = cur;
1856     return(ret);
1857 }
1858 
1859 /**
1860  * xmlParseURIReference:
1861  * @param uri pointer to an URI structure
1862  * @param str the string to analyze
1863  *
1864  * Parse an URI reference string and fills in the appropriate fields
1865  * of the uri structure
1866  *
1867  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1868  *
1869  * Returns 0 or the error code
1870  *
1871  * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1872  */
1873 XMLPUBFUNEXPORT int
xmlParseURIReference(xmlURIPtr uri,const char * str)1874 xmlParseURIReference(xmlURIPtr uri, const char *str) {
1875     int ret;
1876     const char *tmp = str;
1877 
1878     if (!str)
1879         return(-1);
1880     xmlCleanURI(uri);
1881 
1882     /*
1883      * Try first to parse absolute refs, then fallback to relative if
1884      * it fails.
1885      */
1886     ret = xmlParseAbsoluteURI(uri, &str); // may set OOM flag
1887     if (ret == XML_ERR_NO_MEMORY)
1888         return ret;
1889 
1890     if (ret != 0) { // No, it is not an absolute URI, try it as a relative one...
1891         xmlCleanURI(uri);
1892         str = tmp;
1893         ret = xmlParseRelativeURI(uri, &str); // may set OOM flag
1894         if (ret == XML_ERR_NO_MEMORY)
1895             return ret;
1896     }
1897 
1898     if (ret != 0) {
1899         xmlCleanURI(uri);
1900         return(ret);
1901     }
1902 
1903     if (*str == '#') {
1904         str++;
1905         ret = xmlParseURIFragment(uri, &str); // may set OOM flag
1906         if (ret != 0) return(ret);
1907     }
1908     if (*str != 0) {
1909         xmlCleanURI(uri);
1910         return(1);
1911     }
1912     return(0);
1913 }
1914 
1915 /**
1916  * xmlParseURI:
1917  * @param str the URI string to analyze
1918  *
1919  * Parse an URI
1920  *
1921  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1922  *
1923  * Returns a newly build xmlURIPtr or NULL in case of error
1924  *
1925  * OOM: possible --> returns NULL for uri!=NULL
1926  */
1927 XMLPUBFUNEXPORT xmlURIPtr
xmlParseURI(const char * str)1928 xmlParseURI(const char *str)
1929 {
1930     xmlURIPtr uri;
1931     int ret;
1932 
1933     if (!str)
1934         return(NULL);
1935     uri = xmlCreateURI();
1936     if (uri) {
1937         ret = xmlParseURIReference(uri, str);
1938         if (ret) {
1939             xmlCleanURI(uri);
1940             xmlFreeURI(uri);
1941             return(NULL);
1942         }
1943     }
1944     return(uri);
1945 }
1946 
1947 /************************************************************************
1948  *                                                                      *
1949  *          Public functions                                            *
1950  *                                                                      *
1951  ************************************************************************/
1952 
1953 /**
1954  * xmlBuildURI:
1955  * @param URI the URI instance found in the document
1956  * @param base the base value
1957  *
1958  * Computes he final URI of the reference done by checking that
1959  * the given URI is valid, and building the final URI using the
1960  * base URI. This is processed according to section 5.2 of the
1961  * RFC 2396
1962  *
1963  * 5.2. Resolving Relative References to Absolute Form
1964  *
1965  * Returns a new URI string (to be freed by the caller) or NULL in case
1966  *         of error.
1967  *
1968  * OOM:
1969  */
1970 XMLPUBFUNEXPORT xmlChar*
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1971 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1972     xmlChar *val = NULL;
1973     int ret, len, indx, cur, out;
1974     xmlURIPtr ref = NULL;
1975     xmlURIPtr bas = NULL;
1976     xmlURIPtr res = NULL;
1977 
1978     /*
1979      * 1) The URI reference is parsed into the potential four components and
1980      *    fragment identifier, as described in Section 4.3.
1981      *
1982      *    NOTE that a completely empty URI is treated by modern browsers
1983      *    as a reference to "." rather than as a synonym for the current
1984      *    URI.  Should we do that here?
1985      */
1986     if (!URI)
1987         ret = -1;
1988     else {
1989         if (*URI) {
1990             ref = xmlCreateURI();
1991             if (ref == NULL)
1992                 goto done;
1993             ret = xmlParseURIReference(ref, (const char*) URI);
1994         }
1995         else
1996             ret = 0;
1997     }
1998     if (ret != 0)
1999         goto done;
2000     if ((ref != NULL) && (ref->scheme != NULL)) {
2001         /*
2002          * The URI is absolute don't modify.
2003          */
2004         val = xmlStrdup(URI);
2005         goto done;
2006     }
2007     if (base == NULL)
2008         ret = -1;
2009     else {
2010         bas = xmlCreateURI();
2011         if (bas == NULL)
2012             goto done;
2013         ret = xmlParseURIReference(bas, (const char *) base);
2014     }
2015     if (ret != 0) {
2016         if (ref)
2017             val = xmlSaveUri(ref);
2018         goto done;
2019     }
2020     if (ref == NULL) {
2021         /*
2022          * the base fragment must be ignored
2023          */
2024         if (bas->fragment != NULL) {
2025             xmlFree(bas->fragment);
2026             bas->fragment = NULL;
2027         }
2028         val = xmlSaveUri(bas);
2029         goto done;
2030     }
2031 
2032     /*
2033      * 2) If the path component is empty and the scheme, authority, and
2034      *    query components are undefined, then it is a reference to the
2035      *    current document and we are done.  Otherwise, the reference URI's
2036      *    query and fragment components are defined as found (or not found)
2037      *    within the URI reference and not inherited from the base URI.
2038      *
2039      *    NOTE that in modern browsers, the parsing differs from the above
2040      *    in the following aspect:  the query component is allowed to be
2041      *    defined while still treating this as a reference to the current
2042      *    document.
2043      */
2044     res = xmlCreateURI();
2045     if (res == NULL)
2046         goto done;
2047     if ((ref->scheme == NULL)   &&
2048         (ref->path == NULL)     &&
2049         (ref->authority == NULL)&&
2050         (ref->server == NULL))
2051     {
2052         if (bas->scheme != NULL)
2053             res->scheme = xmlMemStrdup(bas->scheme);
2054         if (bas->authority != NULL)
2055             res->authority = xmlMemStrdup(bas->authority);
2056         else if (bas->server != NULL) {
2057             res->server = xmlMemStrdup(bas->server);
2058             if (bas->user != NULL)
2059                 res->user = xmlMemStrdup(bas->user);
2060             res->port = bas->port;
2061         }
2062         if (bas->path != NULL)
2063             res->path = xmlMemStrdup(bas->path);
2064         if (ref->query != NULL)
2065             res->query = xmlMemStrdup(ref->query);
2066         else if (bas->query != NULL)
2067             res->query = xmlMemStrdup(bas->query);
2068         if (ref->fragment != NULL)
2069             res->fragment = xmlMemStrdup(ref->fragment);
2070         goto step_7;
2071     }
2072 
2073     /*
2074      * 3) If the scheme component is defined, indicating that the reference
2075      *    starts with a scheme name, then the reference is interpreted as an
2076      *    absolute URI and we are done.  Otherwise, the reference URI's
2077      *    scheme is inherited from the base URI's scheme component.
2078      */
2079     if (ref->scheme != NULL) {
2080         val = xmlSaveUri(ref);
2081         goto done;
2082     }
2083     if (bas->scheme != NULL)
2084         res->scheme = xmlMemStrdup(bas->scheme);
2085 
2086     if (ref->query != NULL)
2087         res->query = xmlMemStrdup(ref->query);
2088     if (ref->fragment != NULL)
2089         res->fragment = xmlMemStrdup(ref->fragment);
2090 
2091     /*
2092      * 4) If the authority component is defined, then the reference is a
2093      *    network-path and we skip to step 7.  Otherwise, the reference
2094      *    URI's authority is inherited from the base URI's authority
2095      *    component, which will also be undefined if the URI scheme does not
2096      *    use an authority component.
2097      */
2098     if ((ref->authority != NULL) || (ref->server != NULL)) {
2099         if (ref->authority != NULL)
2100             res->authority = xmlMemStrdup(ref->authority);
2101         else {
2102             res->server = xmlMemStrdup(ref->server);
2103             if (ref->user != NULL)
2104             res->user = xmlMemStrdup(ref->user);
2105                 res->port = ref->port;
2106         }
2107         if (ref->path != NULL)
2108             res->path = xmlMemStrdup(ref->path);
2109         goto step_7;
2110      }
2111      if (bas->authority != NULL)
2112         res->authority = xmlMemStrdup(bas->authority);
2113      else if (bas->server != NULL) {
2114             res->server = xmlMemStrdup(bas->server);
2115             if (bas->user != NULL)
2116                 res->user = xmlMemStrdup(bas->user);
2117             res->port = bas->port;
2118         }
2119 
2120     /*
2121      * 5) If the path component begins with a slash character ("/"), then
2122      *    the reference is an absolute-path and we skip to step 7.
2123      */
2124     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2125         res->path = xmlMemStrdup(ref->path);
2126         goto step_7;
2127     }
2128 
2129 
2130     /*
2131      * 6) If this step is reached, then we are resolving a relative-path
2132      *    reference.  The relative path needs to be merged with the base
2133      *    URI's path.  Although there are many ways to do this, we will
2134      *    describe a simple method using a separate string buffer.
2135      *
2136      * Allocate a buffer large enough for the result string.
2137      */
2138     len = 2; /* extra / and 0 */
2139     if (ref->path != NULL)
2140         len += strlen(ref->path);
2141     if (bas->path != NULL)
2142         len += strlen(bas->path);
2143     res->path = (char *) xmlMallocAtomic(len);
2144     if (res->path == NULL) {
2145         xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlBuildURI: out of memory\n"));
2146         goto done;
2147     }
2148     res->path[0] = 0;
2149 
2150     /*
2151      * a) All but the last segment of the base URI's path component is
2152      *    copied to the buffer.  In other words, any characters after the
2153      *    last (right-most) slash character, if any, are excluded.
2154      */
2155     cur = 0;
2156     out = 0;
2157     if (bas->path != NULL) {
2158         while (bas->path[cur] != 0) {
2159             while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2160                 cur++;
2161             if (bas->path[cur] == 0)
2162                 break;
2163 
2164             cur++;
2165             while (out < cur) {
2166                 res->path[out] = bas->path[out];
2167                 out++;
2168             }
2169         }
2170     }
2171     res->path[out] = 0;
2172 
2173     /*
2174      * b) The reference's path component is appended to the buffer
2175      *    string.
2176      */
2177     if (ref->path != NULL && ref->path[0] != 0) {
2178         indx = 0;
2179         /*
2180          * Ensure the path includes a '/'
2181          */
2182         if ((out == 0) && (bas->server != NULL))
2183             res->path[out++] = '/';
2184         while (ref->path[indx] != 0) {
2185             res->path[out++] = ref->path[indx++];
2186         }
2187     }
2188     res->path[out] = 0;
2189 
2190     /*
2191      * Steps c) to h) are really path normalization steps
2192      */
2193     xmlNormalizeURIPath(res->path);
2194 
2195 step_7:
2196 
2197     /*
2198      * 7) The resulting URI components, including any inherited from the
2199      *    base URI, are recombined to give the absolute form of the URI
2200      *    reference.
2201      */
2202     val = xmlSaveUri(res);
2203 
2204 done:
2205     if (ref)
2206         xmlFreeURI(ref);
2207     if (bas)
2208         xmlFreeURI(bas);
2209     if (res)
2210         xmlFreeURI(res);
2211     return(val);
2212 }
2213 
2214 /**
2215  * xmlCanonicPath:
2216  * @param path the resource locator in a filesystem notation
2217  *
2218  * Constructs a canonic path from the specified path.
2219  *
2220  * Returns a new canonic path, or a duplicate of the path parameter if the
2221  * construction fails. The caller is responsible for freeing the memory occupied
2222  * by the returned string. If there is insufficient memory available, or the
2223  * argument is NULL, the function returns NULL.
2224  *
2225  * OOM:
2226  */
2227 #define IS_WINDOWS_PATH(p)                          \
2228     ((p != NULL) &&                                 \
2229      (((p[0] >= 'a') && (p[0] <= 'z')) ||           \
2230       ((p[0] >= 'A') && (p[0] <= 'Z'))) &&          \
2231      (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2232 //ISSUE: Not finished reviewing for OOM handling / O.K.: 16.05.05
2233 XMLPUBFUNEXPORT xmlChar*
xmlCanonicPath(const xmlChar * path)2234 xmlCanonicPath(const xmlChar *path)
2235 {
2236 #if (defined(_WIN32)||defined(__SYMBIAN32__)) && !defined(__CYGWIN__)
2237     int len = 0;
2238     //int i = 0;
2239     xmlChar *p = NULL;
2240 #endif
2241     xmlChar *ret;
2242     xmlURIPtr uri;
2243 
2244     // DO NOT REMOVE this check
2245     if (path == NULL)
2246         return(NULL);
2247 
2248     if ((uri = xmlParseURI((const char *) path)) != NULL)
2249     {
2250         xmlFreeURI(uri);
2251         return xmlStrdup(path);
2252     }
2253     // It's should be OOM already!!! // ISSUE: Not finished work / OK: 16.05.05
2254     uri = xmlCreateURI();
2255     if (uri == NULL) {
2256         return(NULL);
2257     }
2258 
2259 #if (defined(_WIN32)||defined(__SYMBIAN32__)) && !defined(__CYGWIN__)
2260     len = xmlStrlen(path);
2261     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2262         uri->scheme = (char*) xmlStrdup(BAD_CAST "file");
2263         uri->path = (char*) xmlMallocAtomic(len + 2);
2264         uri->path[0] = '/';
2265         p = (xmlChar*) uri->path + 1;
2266         strncpy((char*)p, (char*)path, len + 1);
2267     } else {
2268         uri->path = (char*) xmlStrdup(path);
2269         p = (xmlChar*) uri->path;
2270     }
2271     while (*p != '\0') {
2272         if (*p == '\\')
2273             *p = '/';
2274         p++;
2275     }
2276 #else
2277     uri->path = (char *) xmlStrdup((const xmlChar *) path);
2278 #endif
2279 
2280     ret = xmlSaveUri(uri);
2281     xmlFreeURI(uri);
2282     return(ret);
2283 }
2284