1 /**
2 * libxml2_uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 * Portion Copyright � 2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
10 */
11
12 #define IN_LIBXML
13 #include "xmlenglibxml.h"
14
15 #include <string.h>
16
17 #include <libxml2_uri.h>
18 #include <libxml2_globals.h>
19
20 /************************************************************************
21 * *
22 * Macros to differentiate various character type *
23 * directly extracted from RFC 2396 *
24 * *
25 ************************************************************************/
26
27 /*
28 * alpha = lowalpha | upalpha
29 */
30 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
31
32
33 /*
34 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
35 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
36 * "u" | "v" | "w" | "x" | "y" | "z"
37 */
38
39 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
40
41 /*
42 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
43 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
44 * "U" | "V" | "W" | "X" | "Y" | "Z"
45 */
46 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
47
48 /*
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50 */
51
52 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
53
54 /*
55 * alphanum = alpha | digit
56 */
57
58 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
59
60 /*
61 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
62 * "a" | "b" | "c" | "d" | "e" | "f"
63 */
64
65 #define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
66 (((x) >= 'A') && ((x) <= 'F')))
67
68 /*
69 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
70 */
71
72 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
73 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
74 ((x) == '(') || ((x) == ')'))
75
76
77 /*
78 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
79 */
80
81 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
82 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
83 ((x) == '+') || ((x) == '$') || ((x) == ','))
84
isReserved(char ch)85 static int isReserved(char ch)
86 {
87 return IS_RESERVED(ch);
88 }
89
90 /*
91 * unreserved = alphanum | mark
92 */
93 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
94
isUnreserved(char ch)95 static int isUnreserved(char ch)
96 {
97 return IS_UNRESERVED(ch);
98 }
99 /*
100 * escaped = "%" hex hex
101 */
102
103 #define IS_ESCAPED(p) \
104 ((*(p) == '%') && (IS_HEX((p)[1])) && (IS_HEX((p)[2])))
105
isEscaped(const char * pch)106 static int isEscaped(const char* pch)
107 {
108 return IS_ESCAPED(pch);
109 }
110 /*
111 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
112 * "&" | "=" | "+" | "$" | ","
113 */
114 /*
115 #define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
116 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
117 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
118 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
119
120 Optimized into:
121
122 #define IS_URIC_NO_SLASH(p) \
123 ((IS_UNRESERVED(*(p))) || \
124 (IS_ESCAPED(p)) || \
125 ((*(p)) != '/' && IS_RESERVED(p)))
126 */
isUriCNoSlash(const char * pch)127 static int isUriCNoSlash(const char* pch)
128 {
129 char ch = *pch;
130 return isUnreserved(ch) ||
131 isEscaped(pch) ||
132 (ch != '/' && isReserved(ch));
133 }
134
135
136 /*
137 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
138 */
139
140 /*
141 #define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
142 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
143 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
144 ((*(p) == ',')))
145 */
isPChar(const char * pch)146 static int isPChar(const char* pch)
147 {
148 char ch = *pch;
149 return isUnreserved(ch) ||
150 isEscaped(pch) ||
151 ch == ':' || ch == '@' || ch == '&' ||
152 ch == '=' || ch == '+' || ch == '$' ||
153 ch == ',';
154 }
155
156
157 /*
158 * rel_segment = 1*( unreserved | escaped |
159 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
160 */
161
162 /*
163 #define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
164 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
165 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
166 ((*(p) == ',')))
167 */
isSegment(const char * pch)168 static int isSegment(const char* pch)
169 {
170 char ch = *pch;
171 return isUnreserved(ch) || isEscaped(pch) ||
172 ch == ';' || ch == '@' || ch == '&' ||
173 ch == '=' || ch == '+' || ch == '$' ||
174 ch == ',';
175 }
176 /*
177 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
178 */
179
180 #define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
181 ((x) == '+') || ((x) == '-') || ((x) == '.'))
182
183 /* the macros is used only once
184 static int isScheme(char x)
185 {
186 return ((IS_ALPHA(x)) || (IS_DIGIT(x)) ||
187 (x) == '+') || ((x) == '-') || ((x) == '.'))
188 }
189 */
190
191 /*
192 * reg_name = 1*( unreserved | escaped | "$" | "," |
193 * ";" | ":" | "@" | "&" | "=" | "+" )
194 */
195
196 /*
197 #define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
198 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
199 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
200 ((*(p) == '=')) || ((*(p) == '+')))
201 */
202
isRegName(const char * pch)203 static int isRegName(const char* pch)
204 {
205 char ch = *pch;
206 return isUnreserved(ch) || isEscaped(pch) ||
207 ch == '$' || ch == ',' || ch == ';' ||
208 ch == ':' || ch == '@' || ch == '&' ||
209 ch == '=' || ch == '+';
210 }
211 /*
212 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
213 * "+" | "$" | "," )
214 */
215
216 /*
217 #define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
218 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
219 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
220 ((*(p) == ',')))
221 */
222
223
isUserInfo(const char * pch)224 static int isUserInfo(const char* pch)
225 {
226 char ch = *pch;
227 return
228 isUnreserved(ch) ||
229 isEscaped(pch) ||
230 ch == ';' || ch == ':' || ch == '&' ||
231 ch == '=' || ch == '+' || ch == '$' ||
232 ch == ',';
233 }
234
235 /*
236 * uric = reserved | unreserved | escaped
237 */
238
239 //#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || (IS_RESERVED(*(p))))
240
isUriC(const char * pch)241 static int isUriC(const char* pch)
242 {
243 char ch = *pch;
244 return isUnreserved(ch) || isEscaped(pch) || isReserved(ch);
245 }
246
247 /*
248 * unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
249 */
250
251 /*
252 #define IS_UNWISE(p) \
253 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
254 ((*(p) == '\\'))|| ((*(p) == '^')) || ((*(p) == '[')) || \
255 ((*(p) == ']')) || ((*(p) == '`')))
256 */
257
isUnwise(char ch)258 static int isUnwise(char ch)
259 {
260 return
261 ch == '{' || ch == '}' || ch == '|' ||
262 ch == '\\' || ch == '^' || ch == '[' ||
263 ch == ']' || ch == '`';
264 }
265 /*
266 * Skip to next pointer char, handle escaped sequences
267 */
268
269 #define NEXT(p) ((*p == '%')? p += 3 : p++)
270
271 /*
272 * Productions from the spec.
273 *
274 * authority = server | reg_name
275 * reg_name = 1*( unreserved | escaped | "$" | "," |
276 * ";" | ":" | "@" | "&" | "=" | "+" )
277 *
278 * path = [ abs_path | opaque_part ]
279 */
280
281 /************************************************************************
282 * *
283 * Generic URI structure functions *
284 * *
285 ************************************************************************/
286
287 /**
288 * xmlCreateURI:
289 *
290 * Simply creates an empty xmlURI
291 *
292 * Returns the new structure or NULL in case of error
293 *
294 * OOM: possible --> returns NULL , sets OOM flag
295 */
296 XMLPUBFUNEXPORT xmlURIPtr
xmlCreateURI(void)297 xmlCreateURI(void) {
298 xmlURIPtr ret;
299
300 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
301 if (ret == NULL) {
302 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlCreateURI: out of memory\n"));
303 return(NULL);
304 }
305 memset(ret, 0, sizeof(xmlURI));
306 return(ret);
307 }
308
309 /**
310 * xmlSaveUri:
311 * @param uri pointer to an xmlURI
312 *
313 * Save the URI as an escaped string
314 *
315 * Returns a new string (to be deallocated by caller)
316 *
317 * OOM:
318 */
319 XMLPUBFUNEXPORT xmlChar*
xmlSaveUri(xmlURIPtr uri)320 xmlSaveUri(xmlURIPtr uri)
321 {
322 xmlChar* ret;// = NULL;
323 const char* p;
324 int len;
325 int max;
326 char ch;
327
328 if (!uri)
329 return(NULL);
330
331 max = 80;
332 ret = (xmlChar*) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
333 if (!ret)
334 goto OOM;
335
336 len = 0;
337
338 if (uri->scheme != NULL) {
339 p = uri->scheme;
340
341 while (*p != 0) {
342 if (len >= max) {
343 xmlChar* tmp;
344 max *= 2;
345 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
346 if (!tmp)
347 goto OOM;
348 ret = tmp;
349 }
350 ret[len++] = *p++;
351 }
352 if (len >= max) {
353 xmlChar* tmp;
354 max *= 2;
355 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
356 if (!tmp)
357 goto OOM;
358 ret = tmp;
359 }
360 ret[len++] = ':';
361 }
362 if (uri->opaque != NULL) {
363 p = uri->opaque;
364 while (*p != 0) {
365 if (len + 3 >= max) {
366 xmlChar* tmp;
367 max *= 2;
368 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
369 if (!tmp)
370 goto OOM;
371 ret = tmp;
372 }
373 if (isReserved(*p) || isUnreserved(*p))
374 {
375 ret[len++] = *p++;
376 }
377 else
378 {
379 int val = *(unsigned char*)p++;
380 int hi = val / 0x10;
381 int lo = val % 0x10;
382 ret[len++] = '%';
383 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
384 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
385 }
386 }
387 }
388 else
389 {
390 if (uri->server != NULL) {
391 if (len + 3 >= max) {
392 xmlChar* tmp;
393 max *= 2;
394 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
395 if (!tmp)
396 goto OOM;
397 ret = tmp;
398 }
399 ret[len++] = '/';
400 ret[len++] = '/';
401 if (uri->user != NULL) {
402 p = uri->user;
403
404 while (*p != 0) {
405 if (len + 3 >= max) {
406 xmlChar* tmp;
407 max *= 2;
408 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
409 if (!tmp)
410 goto OOM;
411 ret = tmp;
412 }
413 ch = *p;
414 if (isUnreserved(ch) ||
415 ch == ';' || ch == ':' ||
416 ch == '&' || ch == '=' ||
417 ch == '+' || ch == '$' ||
418 ch == ',')
419 {
420 ret[len++] = *p++;
421 }
422 else
423 {
424 int val = *(unsigned char *)p++;
425 int hi = val / 0x10, lo = val % 0x10;
426 ret[len++] = '%';
427 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
428 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
429 }
430 }
431 if (len + 3 >= max) {
432 xmlChar* tmp;
433 max *= 2;
434 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
435 if (!tmp)
436 goto OOM;
437 ret = tmp;
438 }
439 ret[len++] = '@';
440 }
441 p = uri->server;
442 while (*p != 0) {
443 if (len >= max) {
444 xmlChar* tmp;
445 max *= 2;
446 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
447 if (!tmp)
448 goto OOM;
449 ret = tmp;
450 }
451 ret[len++] = *p++;
452 }
453 if (uri->port > 0) {
454 if (len + 10 >= max) {
455 xmlChar* tmp;
456 max *= 2;
457 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
458 if (!tmp)
459 goto OOM;
460 ret = tmp;
461 }
462 len += snprintf((char*) &ret[len], max - len, ":%d", uri->port);
463 }
464 }
465 else if (uri->authority != NULL) {
466 if (len + 3 >= max) {
467 xmlChar* tmp;
468 max *= 2;
469 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
470 if (!tmp)
471 goto OOM;
472 ret = tmp;
473 }
474 ret[len++] = '/';
475 ret[len++] = '/';
476 p = uri->authority;
477 while (*p != 0) {
478 if (len + 3 >= max) {
479 xmlChar* tmp;
480 max *= 2;
481 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
482 if (!tmp)
483 goto OOM;
484 ret = tmp;
485 }
486 ch = *p;
487 if (isUnreserved(ch) ||
488 ch == '$' || ch == ',' || ch == ';' ||
489 ch == ':' || ch == '@' || ch == '&' ||
490 ch == '=' || ch == '+')
491 {
492 ret[len++] = *p++;
493 }
494 else
495 {
496 int val = *(unsigned char *)p++;
497 int hi = val / 0x10;
498 int lo = val % 0x10;
499 ret[len++] = '%';
500 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
501 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
502 }
503 }
504 }
505 else if (uri->scheme != NULL) {
506 if (len + 3 >= max) {
507 xmlChar* tmp;
508 max *= 2;
509 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
510 if (!tmp)
511 goto OOM;
512 ret = tmp;
513 }
514 ret[len++] = '/';
515 ret[len++] = '/';
516 }
517 if (uri->path != NULL) {
518 p = uri->path;
519 while (*p != 0) {
520 if (len + 3 >= max) {
521 xmlChar* tmp;
522 max *= 2;
523 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
524 if (!tmp)
525 goto OOM;
526 ret = tmp;
527 }
528 ch = *p;
529 if (isUnreserved(ch) || ch == '/' ||
530 ch == ';' || ch == '@' || ch == '&' ||
531 ch == '=' || ch == '+' || ch == '$' ||
532 ch == ',')
533 {
534 ret[len++] = *p++;
535 }
536 else
537 {
538 int val = *(unsigned char *)p++;
539 int hi = val / 0x10;
540 int lo = val % 0x10;
541 ret[len++] = '%';
542 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
543 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
544 }
545 }
546 }
547 if (uri->query != NULL) {
548
549 if (len + 3 >= max) {
550 xmlChar* tmp;
551 max *= 2;
552 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
553 if (!tmp)
554 goto OOM;
555 ret = tmp;
556 }
557 ret[len++] = '?';
558 p = uri->query;
559 while (*p != 0) {
560 if (len + 3 >= max) {
561 xmlChar* tmp;
562 max *= 2;
563 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
564 if (!tmp)
565 goto OOM;
566 ret = tmp;
567 }
568 if (isUnreserved(*p) || isReserved(*p))
569 {
570 ret[len++] = *p++;
571 }
572 else
573 {
574 int val = *(unsigned char *)p++;
575 int hi = val / 0x10;
576 int lo = val % 0x10;
577 ret[len++] = '%';
578 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
579 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
580 }
581 }
582 }
583 }
584 if (uri->fragment != NULL) {
585 if (len + 3 >= max) {
586 xmlChar* tmp;
587 max *= 2;
588 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
589 if (!tmp)
590 goto OOM;
591 ret = tmp;
592 }
593 ret[len++] = '#';
594 p = uri->fragment;
595
596 while (*p != 0) {
597 if (len + 3 >= max) {
598 xmlChar* tmp;
599 max *= 2;
600 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
601 if (!tmp)
602 goto OOM;
603 ret = tmp;
604 }
605 if (isUnreserved(*p) || isReserved(*p))
606 {
607 ret[len++] = *p++;
608 }
609 else
610 {
611 int val = *(unsigned char *)p++;
612 int hi = val / 0x10;
613 int lo = val % 0x10;
614 ret[len++] = '%';
615 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
616 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
617 }
618 }
619 }
620 if (len >= max) {
621 xmlChar* tmp;
622 max *= 2;
623 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc
624 if (!tmp)
625 goto OOM;
626 ret = tmp;
627 }
628 ret[len++] = 0;
629 return(ret);
630 //------------------------------
631 OOM:
632 if(ret)
633 xmlFree(ret);
634 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlSaveUri: out of memory\n"));
635 return(NULL);
636 }
637
638 #ifndef XMLENGINE_EXCLUDE_FILE_FUNC
639 /**
640 * xmlPrintURI:
641 * @param stream a FILE* for the output
642 * @param uri pointer to an xmlURI
643 *
644 * Prints the URI in the stream stream.
645 */
646 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)647 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
648 xmlChar *out;
649
650 out = xmlSaveUri(uri);
651 if (out != NULL) {
652 fprintf(stream, "%s", (char *) out);
653 xmlFree(out);
654 }
655 }
656 #endif
657
658 /**
659 * xmlCleanURI:
660 * @param uri pointer to an xmlURI
661 *
662 * Make sure the xmlURI struct is free of content
663 *
664 * OOM: never
665 */
666 static void
xmlCleanURI(xmlURIPtr uri)667 xmlCleanURI(xmlURIPtr uri) {
668 if (uri == NULL) return;
669
670 if (uri->scheme){
671 xmlFree(uri->scheme);
672 uri->scheme = NULL;
673 }
674 if (uri->server){
675 xmlFree(uri->server);
676 uri->server = NULL;
677 }
678 if (uri->user){
679 xmlFree(uri->user);
680 uri->user = NULL;
681 }
682 if (uri->path) {
683 xmlFree(uri->path);
684 uri->path = NULL;
685 }
686 if (uri->fragment) {
687 xmlFree(uri->fragment);
688 uri->fragment = NULL;
689 }
690 if (uri->opaque) {
691 xmlFree(uri->opaque);
692 uri->opaque = NULL;
693 }
694 if (uri->authority) {
695 xmlFree(uri->authority);
696 uri->authority = NULL;
697 }
698 if (uri->query) {
699 xmlFree(uri->query);
700 uri->query = NULL;
701 }
702 }
703
704 /**
705 * xmlFreeURI:
706 * @param uri pointer to an xmlURI
707 *
708 * Free up the xmlURI struct
709 */
710 XMLPUBFUNEXPORT void
xmlFreeURI(xmlURIPtr uri)711 xmlFreeURI(xmlURIPtr uri) {
712 if (uri == NULL) return;
713
714 if (uri->scheme) xmlFree(uri->scheme);
715 if (uri->server) xmlFree(uri->server);
716 if (uri->user ) xmlFree(uri->user);
717 if (uri->path ) xmlFree(uri->path);
718 if (uri->fragment) xmlFree(uri->fragment);
719 if (uri->opaque) xmlFree(uri->opaque);
720 if (uri->authority) xmlFree(uri->authority);
721 if (uri->query) xmlFree(uri->query);
722 xmlFree(uri);
723 }
724
725 /************************************************************************
726 * *
727 * Helper functions *
728 * *
729 ************************************************************************/
730
731 /**
732 * xmlNormalizeURIPath:
733 * @param path pointer to the path string
734 *
735 * Applies the 5 normalization steps to a path string--that is, RFC 2396
736 * Section 5.2, steps 6.c through 6.g.
737 *
738 * Normalization occurs directly on the string, no new allocation is done
739 *
740 * Returns 0 or an error code
741 */
742 XMLPUBFUNEXPORT int
xmlNormalizeURIPath(char * path)743 xmlNormalizeURIPath(char *path) {
744 char *cur, *out;
745
746 if (path == NULL)
747 return(-1);
748
749 /* Skip all initial "/" chars. We want to get to the beginning of the
750 * first non-empty segment.
751 */
752 cur = path;
753 while (cur[0] == '/')
754 ++cur;
755 if (cur[0] == '\0')
756 return(0);
757
758 /* Keep everything we've seen so far. */
759 out = cur;
760
761 /*
762 * Analyze each segment in sequence for cases (c) and (d).
763 */
764 while (cur[0] != '\0') {
765 /*
766 * c) All occurrences of "./", where "." is a complete path segment,
767 * are removed from the buffer string.
768 */
769 if ((cur[0] == '.') && (cur[1] == '/')) {
770 cur += 2;
771 /* '//' normalization should be done at this point too */
772 while (cur[0] == '/')
773 cur++;
774 continue;
775 }
776
777 /*
778 * d) If the buffer string ends with "." as a complete path segment,
779 * that "." is removed.
780 */
781 if ((cur[0] == '.') && (cur[1] == '\0'))
782 break;
783
784 /* Otherwise keep the segment. */
785 while (cur[0] != '/') {
786 if (cur[0] == '\0')
787 goto done_cd;
788 (out++)[0] = (cur++)[0];
789 }
790 /* nomalize // */
791 while ((cur[0] == '/') && (cur[1] == '/'))
792 cur++;
793
794 (out++)[0] = (cur++)[0];
795 }
796 done_cd:
797 out[0] = '\0';
798
799 /* Reset to the beginning of the first segment for the next sequence. */
800 cur = path;
801 while (cur[0] == '/')
802 ++cur;
803 if (cur[0] == '\0')
804 return(0);
805
806 /*
807 * Analyze each segment in sequence for cases (e) and (f).
808 *
809 * e) All occurrences of "<segment>/../", where <segment> is a
810 * complete path segment not equal to "..", are removed from the
811 * buffer string. Removal of these path segments is performed
812 * iteratively, removing the leftmost matching pattern on each
813 * iteration, until no matching pattern remains.
814 *
815 * f) If the buffer string ends with "<segment>/..", where <segment>
816 * is a complete path segment not equal to "..", that
817 * "<segment>/.." is removed.
818 *
819 * To satisfy the "iterative" clause in (e), we need to collapse the
820 * string every time we find something that needs to be removed. Thus,
821 * we don't need to keep two pointers into the string: we only need a
822 * "current position" pointer.
823 */
824 while (1) {
825 char *segp, *tmp;
826
827 /* At the beginning of each iteration of this loop, "cur" points to
828 * the first character of the segment we want to examine.
829 */
830
831 /* Find the end of the current segment. */
832 segp = cur;
833 while ((segp[0] != '/') && (segp[0] != '\0'))
834 ++segp;
835
836 /* If this is the last segment, we're done (we need at least two
837 * segments to meet the criteria for the (e) and (f) cases).
838 */
839 if (segp[0] == '\0')
840 break;
841
842 /* If the first segment is "..", or if the next segment _isn't_ "..",
843 * keep this segment and try the next one.
844 */
845 ++segp;
846 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
847 || ((segp[0] != '.') || (segp[1] != '.')
848 || ((segp[2] != '/') && (segp[2] != '\0')))) {
849 cur = segp;
850 continue;
851 }
852
853 /* If we get here, remove this segment and the next one and back up
854 * to the previous segment (if there is one), to implement the
855 * "iteratively" clause. It's pretty much impossible to back up
856 * while maintaining two pointers into the buffer, so just compact
857 * the whole buffer now.
858 */
859
860 /* If this is the end of the buffer, we're done. */
861 if (segp[2] == '\0') {
862 cur[0] = '\0';
863 break;
864 }
865 /* Valgrind complained, strcpy(cur, segp + 3); */
866 /* string will overlap, do not use strcpy */
867 tmp = cur;
868 segp += 3;
869 while ((*tmp++ = *segp++) != 0) {} // NOTE: no loop body here
870
871 /* If there are no previous segments, then keep going from here. */
872 segp = cur;
873 while ((segp > path) && ((--segp)[0] == '/'))
874 ;
875 if (segp == path)
876 continue;
877
878 /* "segp" is pointing to the end of a previous segment; find it's
879 * start. We need to back up to the previous segment and start
880 * over with that to handle things like "foo/bar/../..". If we
881 * don't do this, then on the first pass we'll remove the "bar/..",
882 * but be pointing at the second ".." so we won't realize we can also
883 * remove the "foo/..".
884 */
885 cur = segp;
886 while ((cur > path) && (cur[-1] != '/'))
887 --cur;
888 }
889 out[0] = '\0';
890
891 /*
892 * g) If the resulting buffer string still begins with one or more
893 * complete path segments of "..", then the reference is
894 * considered to be in error. Implementations may handle this
895 * error by retaining these components in the resolved path (i.e.,
896 * treating them as part of the final URI), by removing them from
897 * the resolved path (i.e., discarding relative levels above the
898 * root), or by avoiding traversal of the reference.
899 *
900 * We discard them from the final path.
901 */
902 if (path[0] == '/') {
903 cur = path;
904 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
905 && ((cur[3] == '/') || (cur[3] == '\0')))
906 cur += 3;
907
908 if (cur != path) {
909 out = path;
910 while (cur[0] != '\0')
911 (out++)[0] = (cur++)[0];
912 out[0] = 0;
913 }
914 }
915
916 return(0);
917 }
918
is_hex(char c)919 static int is_hex(char c) {
920 if (((c >= '0') && (c <= '9')) ||
921 ((c >= 'a') && (c <= 'f')) ||
922 ((c >= 'A') && (c <= 'F')))
923 return(1);
924 return(0);
925 }
926
927 /**
928 * xmlURIUnescapeString:
929 * @param str the string to unescape
930 * @param len the length in bytes to unescape (or <= 0 to indicate full string)
931 * @param target optional destination buffer
932 *
933 * Unescaping routine, does not do validity checks !
934 * Output is direct unsigned char translation of %XX values (no encoding)
935 *
936 * Returns an copy of the string, but unescaped
937 *
938 * OOM: possible --> sets OOM when returns NULL for target==NULL
939 */
940 XMLPUBFUNEXPORT char*
xmlURIUnescapeString(const char * str,int len,char * target)941 xmlURIUnescapeString(const char* str, int len, char* target) {
942 char* ret;
943 char* out;
944 const char *in;
945
946 if (!str)
947 return(NULL);
948 if (len <= 0)
949 len = strlen(str);
950 if (len < 0)
951 return(NULL);
952
953 if (!target) {
954 ret = (char*) xmlMallocAtomic(len + 1); // may set OOM
955 if (!ret) {
956 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlURIUnescapeString: out of memory\n"));
957 return(NULL);
958 }
959 } else
960 ret = target;
961
962 in = str;
963 out = ret;
964 while(len > 0) {
965 if ((*in == '%') && (is_hex(in[1])) && (is_hex(in[2])))
966 {
967 in++;
968 if ((*in >= '0') && (*in <= '9'))
969 *out = (*in - '0');
970 else if ((*in >= 'a') && (*in <= 'f'))
971 *out = (*in - 'a') + 10;
972 else if ((*in >= 'A') && (*in <= 'F'))
973 *out = (*in - 'A') + 10;
974 in++;
975 if ((*in >= '0') && (*in <= '9'))
976 *out = *out * 16 + (*in - '0');
977 else if ((*in >= 'a') && (*in <= 'f'))
978 *out = *out * 16 + (*in - 'a') + 10;
979 else if ((*in >= 'A') && (*in <= 'F'))
980 *out = *out * 16 + (*in - 'A') + 10;
981 in++;
982 len -= 3;
983 out++;
984 } else {
985 *out++ = *in++;
986 len--;
987 }
988 }
989 *out = 0;
990 return(ret);
991 }
992
993 /**
994 * xmlURIEscapeStr:
995 * @param str string to escape
996 * @param list exception list string of chars not to escape
997 *
998 * This routine escapes a string to hex, ignoring reserved characters (a-z)
999 * and the characters in the exception list.
1000 *
1001 * Returns a new escaped string or NULL in case of error.
1002 */
1003 XMLPUBFUNEXPORT xmlChar*
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1004 xmlURIEscapeStr(const xmlChar* str, const xmlChar* list)
1005 {
1006 xmlChar* ret;
1007 xmlChar ch;
1008 const xmlChar *in;
1009
1010 unsigned int len, out;
1011
1012 if (!str)
1013 return(NULL);
1014 len = xmlStrlen(str);
1015 if (!(len > 0)) return(NULL);
1016
1017 len += 20;
1018 ret = (xmlChar*) xmlMallocAtomic(len);
1019 if (!ret)
1020 goto OOM;
1021
1022 in = (const xmlChar *) str;
1023 out = 0;
1024 while(*in != 0) {
1025 if (len - out <= 3) {
1026 xmlChar* tmp;
1027 len += 20;
1028 tmp = (xmlChar*) xmlRealloc(ret, len); // DONE: Fix xmlRealloc
1029 if (!tmp)
1030 goto OOM;
1031 ret = tmp;
1032 }
1033
1034 ch = *in;
1035
1036 if ((ch != '@') && (!isUnreserved(ch)) && (!xmlStrchr(list, ch))) {
1037 unsigned char val;
1038 ret[out++] = '%';
1039 val = ch >> 4;
1040 if (val <= 9)
1041 ret[out++] = '0' + val;
1042 else
1043 ret[out++] = 'A' + val - 0xA;
1044 val = ch & 0xF;
1045 if (val <= 9)
1046 ret[out++] = '0' + val;
1047 else
1048 ret[out++] = 'A' + val - 0xA;
1049 in++;
1050 } else {
1051 ret[out++] = *in++;
1052 }
1053 } // while(*in != 0)
1054 ret[out] = 0;
1055 return(ret);
1056 //---------------------
1057 OOM:
1058 if(ret)
1059 xmlFree(ret);
1060 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlURIEscapeStr: out of memory\n"));
1061 return(NULL);
1062 }
1063
1064 /**
1065 * xmlURIEscape:
1066 * @param str the string of the URI to escape
1067 *
1068 * Escaping routine, does not do validity checks !
1069 * It will try to escape the chars needing this, but this is heuristic
1070 * based it's impossible to be sure.
1071 *
1072 * Returns an copy of the string, but escaped
1073 *
1074 * 25 May 2001
1075 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1076 * according to RFC2396.
1077 * - Carl Douglas
1078 */
1079 XMLPUBFUNEXPORT xmlChar*
xmlURIEscape(const xmlChar * str)1080 xmlURIEscape(const xmlChar * str)
1081 {
1082 xmlChar* ret;
1083 xmlChar* segment;
1084 xmlURIPtr uri;
1085 int ret2;
1086
1087 #define NULLCHK(p) if(!p) goto OOM
1088
1089
1090 if (str == NULL)
1091 return (NULL);
1092
1093 uri = xmlCreateURI();
1094 if (uri != NULL) {
1095 /*
1096 * Allow escaping errors in the unescaped form
1097 */
1098 uri->cleanup = 1;
1099 ret2 = xmlParseURIReference(uri, (const char *)str);
1100 if (ret2) {
1101 xmlFreeURI(uri);
1102 return (NULL);
1103 }
1104 }
1105
1106 if (!uri)
1107 return NULL;
1108
1109 ret = NULL;
1110
1111 if (uri->scheme) {
1112 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1113 NULLCHK(segment);
1114 ret = xmlStrcat(ret, segment);
1115 ret = xmlStrcat(ret, BAD_CAST ":");
1116 xmlFree(segment);
1117 }
1118
1119 if (uri->authority) {
1120 segment = xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1121 NULLCHK(segment);
1122 ret = xmlStrcat(ret, BAD_CAST "//");
1123 ret = xmlStrcat(ret, segment);
1124 xmlFree(segment);
1125 }
1126
1127 if (uri->user) {
1128 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1129 NULLCHK(segment);
1130 ret = xmlStrcat(ret,BAD_CAST "//");
1131 ret = xmlStrcat(ret, segment);
1132 ret = xmlStrcat(ret, BAD_CAST "@");
1133 xmlFree(segment);
1134 }
1135
1136 if (uri->server) {
1137 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1138 NULLCHK(segment);
1139 if (uri->user == NULL)
1140 ret = xmlStrcat(ret, BAD_CAST "//");
1141 ret = xmlStrcat(ret, segment);
1142 xmlFree(segment);
1143 }
1144
1145 if (uri->port) {
1146 xmlChar port[10];
1147
1148 snprintf((char *) port, 10, "%d", uri->port);
1149 ret = xmlStrcat(ret, BAD_CAST ":");
1150 ret = xmlStrcat(ret, port);
1151 }
1152
1153 if (uri->path) {
1154 segment = xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1155 NULLCHK(segment);
1156 ret = xmlStrcat(ret, segment);
1157 xmlFree(segment);
1158 }
1159
1160 if (uri->query) {
1161 segment = xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1162 NULLCHK(segment);
1163 ret = xmlStrcat(ret, BAD_CAST "?");
1164 ret = xmlStrcat(ret, segment);
1165 xmlFree(segment);
1166 }
1167
1168 if (uri->opaque) {
1169 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1170 NULLCHK(segment);
1171 ret = xmlStrcat(ret, segment);
1172 xmlFree(segment);
1173 }
1174
1175 if (uri->fragment) {
1176 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1177 NULLCHK(segment);
1178 ret = xmlStrcat(ret, BAD_CAST "#");
1179 ret = xmlStrcat(ret, segment);
1180 xmlFree(segment);
1181 }
1182
1183 xmlFreeURI(uri);
1184 #undef NULLCHK
1185
1186 return (ret);
1187 //--------------------
1188 OOM:
1189 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlURIEscape: out of memory\n"));
1190 xmlFreeURI(uri);
1191 if(ret) xmlFree(ret);
1192 return NULL;
1193 }
1194
1195 /************************************************************************
1196 * *
1197 * Escaped URI parsing *
1198 * *
1199 ************************************************************************/
1200
1201 /**
1202 * xmlParseURIFragment:
1203 * @param uri pointer to an URI structure
1204 * @param str pointer to the string to analyze
1205 *
1206 * Parse an URI fragment string and fills in the appropriate fields
1207 * of the uri structure.
1208 *
1209 * fragment = *uric
1210 *
1211 * Returns 0 or the error code
1212 *
1213 * OOM: possible --> returns XML_ERR_NO_MEMORY and sets OOM flag
1214 */
1215 static int
xmlParseURIFragment(xmlURIPtr uri,const char ** str)1216 xmlParseURIFragment(xmlURIPtr uri, const char **str)
1217 {
1218 const char *cur = *str;
1219
1220 if (str == NULL)
1221 return (-1);
1222
1223 while (isUriC(cur) || isUnwise(*cur))
1224 NEXT(cur);
1225
1226 if (uri != NULL) {
1227 if (uri->fragment != NULL)
1228 xmlFree(uri->fragment);
1229 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1230 if(!uri->fragment)
1231 return XML_ERR_NO_MEMORY;
1232 }
1233 *str = cur;
1234 return (0);
1235 }
1236
1237 /**
1238 * xmlParseURIQuery:
1239 * @param uri pointer to an URI structure
1240 * @param str pointer to the string to analyze
1241 *
1242 * Parse the query part of an URI
1243 *
1244 * query = *uric
1245 *
1246 * Returns 0 or the error code
1247 *
1248 * OOM: possible for uri!=NULL --> returns XML_ERR_NO_MEMORY and sets OOM flag
1249 */
1250 static int
xmlParseURIQuery(xmlURIPtr uri,const char ** str)1251 xmlParseURIQuery(xmlURIPtr uri, const char **str)
1252 {
1253 const char *cur = *str;
1254
1255 if (str == NULL)
1256 return (-1);
1257
1258 while (isUriC(cur) || (uri && uri->cleanup && isUnwise(*cur)))
1259 NEXT(cur);
1260 if (uri != NULL) {
1261 if (uri->query != NULL)
1262 xmlFree(uri->query);
1263 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1264 if(!uri->query)
1265 return XML_ERR_NO_MEMORY;
1266 }
1267 *str = cur;
1268 return (0);
1269 }
1270
1271 /**
1272 * xmlParseURIScheme:
1273 * @param uri pointer to an URI structure
1274 * @param str pointer to the string to analyze
1275 *
1276 * Parse an URI scheme
1277 *
1278 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1279 *
1280 * Returns 0 or the error code
1281 *
1282 * OOM: possible --> sets OOM flag returns XML_ERR_NO_MEMORY
1283 */
1284 static int
xmlParseURIScheme(xmlURIPtr uri,const char ** str)1285 xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1286 const char *cur;
1287
1288 if (str == NULL)
1289 return(-1);
1290
1291 cur = *str;
1292 if (!IS_ALPHA(*cur))
1293 return(1); // was 2 which is XML_ERR_NO_MEMORY
1294 cur++;
1295 while (IS_SCHEME(*cur))
1296 cur++;
1297 if (uri != NULL) {
1298 if (uri->scheme)
1299 xmlFree(uri->scheme);
1300 /* !!! strndup */
1301 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM, when returns NULL
1302 if(!uri->scheme)
1303 return XML_ERR_NO_MEMORY;
1304 }
1305 *str = cur;
1306 return(0);
1307 }
1308
1309 /**
1310 * xmlParseURIOpaquePart:
1311 * @param uri pointer to an URI structure
1312 * @param str pointer to the string to analyze
1313 *
1314 * Parse an URI opaque part
1315 *
1316 * opaque_part = uric_no_slash *uric
1317 *
1318 * Returns 0 or the error code
1319 *
1320 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1321 */
1322 static int
xmlParseURIOpaquePart(xmlURIPtr uri,const char ** str)1323 xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1324 {
1325 const char* cur;
1326
1327 if (str == NULL)
1328 return (-1);
1329
1330 cur = *str;
1331 if (!(isUriCNoSlash(cur) ||
1332 ( (uri != NULL) && (uri->cleanup) && (isUnwise(*cur)) )
1333 ))
1334 {
1335 return (3);
1336 }
1337 NEXT(cur);
1338 while (isUriC(cur) || ((uri != NULL) && (uri->cleanup) && (isUnwise(*cur))))
1339 {
1340 NEXT(cur);
1341 }
1342 if (uri != NULL) {
1343 if (uri->opaque != NULL)
1344 xmlFree(uri->opaque);
1345 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1346 if(!uri->opaque)
1347 return XML_ERR_NO_MEMORY;
1348 }
1349 *str = cur;
1350 return (0);
1351 }
1352
1353 /**
1354 * xmlParseURIServer:
1355 * @param uri pointer to an URI structure
1356 * @param str pointer to the string to analyze
1357 *
1358 * Parse a server subpart of an URI, it's a finer grain analysis
1359 * of the authority part.
1360 *
1361 * server = [ [ userinfo "@" ] hostport ]
1362 * userinfo = *( unreserved | escaped |
1363 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1364 * hostport = host [ ":" port ]
1365 * host = hostname | IPv4address
1366 * hostname = *( domainlabel "." ) toplabel [ "." ]
1367 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1368 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1369 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1370 * port = *digit
1371 *
1372 * Returns 0 or the error code
1373 *
1374 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1375 */
1376 static int
xmlParseURIServer(xmlURIPtr uri,const char ** str)1377 xmlParseURIServer(xmlURIPtr uri, const char **str) {
1378 const char* cur;
1379 const char* host;
1380 const char* tmp;
1381 const int IPmax = 4;
1382 int oct;
1383
1384 if (str == NULL)
1385 return(-1);
1386
1387 cur = *str;
1388
1389 /*
1390 * is there an userinfo ?
1391 */
1392 while (isUserInfo(cur))
1393 {
1394 NEXT(cur);
1395 }
1396
1397 if (*cur == '@')
1398 {
1399 if (uri != NULL) {
1400 if (uri->user != NULL)
1401 xmlFree(uri->user);
1402 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1403 if(!uri->user)
1404 return XML_ERR_NO_MEMORY;
1405 }
1406 cur++;
1407 } else {
1408 if (uri != NULL) {
1409 if (uri->user != NULL)
1410 xmlFree(uri->user);
1411 uri->user = NULL;
1412 }
1413 cur = *str;
1414 }
1415 /*
1416 * This can be empty in the case where there is no server
1417 */
1418 host = cur;
1419 if (*cur == '/') {
1420 if (uri != NULL) {
1421 if (uri->authority != NULL)
1422 xmlFree(uri->authority);
1423 uri->authority = NULL;
1424 if (uri->server != NULL)
1425 xmlFree(uri->server);
1426 uri->server = NULL;
1427 uri->port = 0;
1428 }
1429 return(0);
1430 }
1431 /*
1432 * host part of hostport can derive either an IPV4 address
1433 * or an unresolved name. Check the IP first, it easier to detect
1434 * errors if wrong one
1435 */
1436 for (oct = 0; oct < IPmax; ++oct) {
1437 if (*cur == '.')
1438 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1439 while(IS_DIGIT(*cur))
1440 cur++;
1441 if (oct == (IPmax-1))
1442 continue;
1443 if (*cur != '.')
1444 break;
1445 cur++;
1446 }
1447 if (oct < IPmax || (*cur == '.' && cur++) || IS_ALPHA(*cur)) {
1448 /* maybe host_name */
1449 if (!IS_ALPHANUM(*cur))
1450 return(4); /* e.g. http://xml.$oft */
1451 do {
1452 do ++cur; while (IS_ALPHANUM(*cur));
1453 if (*cur == '-') {
1454 --cur;
1455 if (*cur == '.')
1456 return(5); /* e.g. http://xml.-soft */
1457 ++cur;
1458 continue;
1459 }
1460 if (*cur == '.') {
1461 --cur;
1462 if (*cur == '-')
1463 return(6); /* e.g. http://xml-.soft */
1464 if (*cur == '.')
1465 return(7); /* e.g. http://xml..soft */
1466 ++cur;
1467 continue;
1468 }
1469 break;
1470 } while (1);
1471
1472 tmp = cur;
1473 if (tmp[-1] == '.')
1474 --tmp; /* e.g. http://xml.$Oft/ */
1475 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1476 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1477 return(8); /* e.g. http://xmlsOft.0rg/ */
1478 }
1479 if (uri != NULL) {
1480 if (uri->authority != NULL)
1481 xmlFree(uri->authority);
1482 uri->authority = NULL;
1483 if (uri->server != NULL)
1484 xmlFree(uri->server);
1485 uri->server = xmlURIUnescapeString(host, cur - host, NULL); // may set OOM flag
1486 if(!uri->server)
1487 return XML_ERR_NO_MEMORY;
1488 }
1489 /*
1490 * finish by checking for a port presence.
1491 */
1492 if (*cur == ':') {
1493 cur++;
1494 if (IS_DIGIT(*cur)) {
1495 if (uri != NULL)
1496 uri->port = 0;
1497 while (IS_DIGIT(*cur)) {
1498 if (uri != NULL)
1499 uri->port = uri->port * 10 + (*cur - '0');
1500 cur++;
1501 }
1502 }
1503 }
1504 *str = cur;
1505 return(0);
1506 }
1507
1508 /**
1509 * xmlParseURIRelSegment:
1510 * @param uri pointer to an URI structure
1511 * @param str pointer to the string to analyze
1512 *
1513 * Parse an URI relative segment
1514 *
1515 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1516 * "+" | "$" | "," )
1517 *
1518 * Returns 0 or the error code
1519 *
1520 * OOM: possible --> sets OOM and returns XML_ERR_NO_MEMORY
1521 */
1522 static int
xmlParseURIRelSegment(xmlURIPtr uri,const char ** str)1523 xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1524 {
1525 const char *cur;
1526
1527 if (str == NULL)
1528 return (-1);
1529
1530 cur = *str;
1531 if (!(isSegment(cur) || ((uri) && (uri->cleanup) && (isUnwise(*cur))))) {
1532 return (3);
1533 }
1534 NEXT(cur);
1535 while (isSegment(cur) || ((uri) && (uri->cleanup) && (isUnwise(*cur))))
1536 NEXT(cur);
1537 if (uri != NULL) {
1538 if (uri->path != NULL)
1539 xmlFree(uri->path);
1540 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag
1541 if(!uri->path)
1542 return XML_ERR_NO_MEMORY;
1543 }
1544 *str = cur;
1545 return (0);
1546 }
1547
1548 /**
1549 * xmlParseURIPathSegments:
1550 * @param uri pointer to an URI structure
1551 * @param str pointer to the string to analyze
1552 * @param slash should we add a leading slash
1553 *
1554 * Parse an URI set of path segments
1555 *
1556 * path_segments = segment *( "/" segment )
1557 * segment = *pchar *( ";" param )
1558 * param = *pchar
1559 *
1560 * Returns 0 or the error code
1561 *
1562 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1563 */
1564 static int
xmlParseURIPathSegments(xmlURIPtr uri,const char ** str,int slash)1565 xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1566 {
1567 const char *cur;
1568
1569 if (str == NULL)
1570 return (-1);
1571
1572 cur = *str;
1573 // XMLENGINE: BEGIN REPLACE
1574 for(;;) {
1575 // Replace while(1) for infinite loop
1576 // do {
1577 // XMLENGINE: END REPLACE
1578
1579 while (isPChar(cur) || (uri && uri->cleanup && (isUnwise(*cur))))
1580 {
1581 NEXT(cur);
1582 }
1583 while (*cur == ';') {
1584 cur++;
1585 while (isPChar(cur) || ((uri != NULL) && (uri->cleanup) && (isUnwise(*cur))))
1586 NEXT(cur);
1587 }
1588 if (*cur != '/')
1589 break;
1590 cur++;
1591 // XMLENGINE: BEGIN REPLACE
1592 }
1593 //} while (1);
1594 // XMLENGINE: END REPLACE
1595 if (uri != NULL) {
1596 int len, len2 = 0;
1597 char *path;
1598
1599 /*
1600 * Concat the set of path segments to the current path
1601 */
1602 len = cur - *str;
1603 if (slash)
1604 len++;
1605
1606 if (uri->path != NULL) {
1607 len2 = strlen(uri->path);
1608 len += len2;
1609 }
1610 path = (char *) xmlMallocAtomic(len + 1); // may set OOM flag
1611 if (path == NULL) {
1612 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlParseURIPathSegments: out of memory\n"));
1613 *str = cur;
1614 //return (-1);
1615 return XML_ERR_NO_MEMORY;
1616 }
1617 if (uri->path != NULL)
1618 memcpy(path, uri->path, len2);
1619 if (slash) {
1620 path[len2] = '/';
1621 len2++;
1622 }
1623 path[len2] = 0;
1624 if (cur - *str > 0)
1625 xmlURIUnescapeString(*str, cur - *str, &path[len2]); // MAY NOT set OOM flag !!!
1626 if (uri->path != NULL)
1627 xmlFree(uri->path);
1628 uri->path = path;
1629 }
1630 *str = cur;
1631 return (0);
1632 }
1633
1634 /**
1635 * xmlParseURIAuthority:
1636 * @param uri pointer to an URI structure
1637 * @param str pointer to the string to analyze
1638 *
1639 * Parse the authority part of an URI.
1640 *
1641 * authority = server | reg_name
1642 * server = [ [ userinfo "@" ] hostport ]
1643 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1644 * "@" | "&" | "=" | "+" )
1645 *
1646 * Note : this is completely ambiguous since reg_name is allowed to
1647 * use the full set of chars in use by server:
1648 *
1649 * 3.2.1. Registry-based Naming Authority
1650 *
1651 * The structure of a registry-based naming authority is specific
1652 * to the URI scheme, but constrained to the allowed characters
1653 * for an authority component.
1654 *
1655 * Returns 0 or the error code
1656 *
1657 * OOM: possible --> OOM flag is set, returns XML_ERR_NO_MEMORY
1658 */
1659 static int
xmlParseURIAuthority(xmlURIPtr uri,const char ** str)1660 xmlParseURIAuthority(xmlURIPtr uri, const char** str) {
1661 const char *cur;
1662 int ret;
1663
1664 if (str == NULL)
1665 return(-1);
1666
1667 cur = *str;
1668
1669 /*
1670 * try first to parse it as a server string.
1671 */
1672 ret = xmlParseURIServer(uri, str); // may set OOM flag -- returns XML_ERR_NO_MEMORY
1673 if ((ret == 0) && (*str != NULL) &&
1674 ((**str == 0) || (**str == '/') || (**str == '?')))
1675 return(0);
1676 if(ret!=0)
1677 return ret; // error happened, maybe OOM
1678 *str = cur;
1679
1680 /*
1681 * failed, fallback to reg_name
1682 */
1683 if (!isRegName(cur)) {
1684 return(5);
1685 }
1686 NEXT(cur);
1687 while (isRegName(cur))
1688 NEXT(cur);
1689
1690 if (uri != NULL) {
1691 if (uri->server != NULL)
1692 xmlFree(uri->server);
1693 uri->server = NULL;
1694 if (uri->user != NULL)
1695 xmlFree(uri->user);
1696 uri->user = NULL;
1697 if (uri->authority != NULL)
1698 xmlFree(uri->authority);
1699 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM
1700 if(!uri->authority)
1701 return XML_ERR_NO_MEMORY;
1702 }
1703 *str = cur;
1704 return(0);
1705 }
1706
1707 /**
1708 * xmlParseURIHierPart:
1709 * @param uri pointer to an URI structure
1710 * @param str pointer to the string to analyze
1711 *
1712 * Parse an URI hierarchical part
1713 *
1714 * hier_part = ( net_path | abs_path ) [ "?" query ]
1715 * abs_path = "/" path_segments
1716 * net_path = "//" authority [ abs_path ]
1717 *
1718 * Returns 0 or the error code
1719 *
1720 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1721 */
1722 static int
xmlParseURIHierPart(xmlURIPtr uri,const char ** str)1723 xmlParseURIHierPart(xmlURIPtr uri, const char** str) {
1724 int ret;
1725 const char* cur;
1726
1727 if (!str)
1728 return(-1);
1729
1730 cur = *str;
1731
1732 if ((cur[0] == '/') && (cur[1] == '/'))
1733 {
1734 cur += 2;
1735 ret = xmlParseURIAuthority(uri, &cur); // may set OOM flag and return XML_ERR_NO_MEMORY
1736 if (ret != 0)
1737 return(ret);
1738
1739 if (cur[0] == '/') {
1740 cur++;
1741 ret = xmlParseURIPathSegments(uri, &cur, 1);
1742 }
1743 } else if (cur[0] == '/') {
1744 cur++;
1745 ret = xmlParseURIPathSegments(uri, &cur, 1);
1746 } else {
1747 return(4);
1748 }
1749 if (ret != 0)
1750 return(ret); // there was error, maybe OOM
1751
1752 if (*cur == '?') {
1753 cur++;
1754 ret = xmlParseURIQuery(uri, &cur); // may set OOM flag
1755 if (ret != 0)
1756 return(ret);
1757 }
1758 *str = cur;
1759 return(0);
1760 }
1761
1762 /**
1763 * xmlParseAbsoluteURI:
1764 * @param uri pointer to an URI structure
1765 * @param str pointer to the string to analyze
1766 *
1767 * Parse an URI reference string and fills in the appropriate fields
1768 * of the uri structure
1769 *
1770 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1771 *
1772 * Returns 0 or the error code
1773 *
1774 * OOM: possible --> sets OOM and returns XML_ERR_NO_MEMORY
1775 */
1776 static int
xmlParseAbsoluteURI(xmlURIPtr uri,const char ** str)1777 xmlParseAbsoluteURI(xmlURIPtr uri, const char** str) {
1778 int ret;
1779 const char* cur;
1780
1781 if (str == NULL)
1782 return(-1);
1783
1784 cur = *str;
1785
1786 ret = xmlParseURIScheme(uri, str); // may set OOM and return XML_ERR_NO_MEMORY
1787 if (ret != 0) return(ret);
1788
1789 if (**str != ':') {
1790 *str = cur;
1791 return(1);
1792 }
1793 (*str)++;
1794 if (**str == '/')
1795 return(xmlParseURIHierPart(uri, str)); // may set OOM flag
1796 return(xmlParseURIOpaquePart(uri, str)); //may set OOM flag
1797 }
1798
1799 /**
1800 * xmlParseRelativeURI:
1801 * @param uri pointer to an URI structure
1802 * @param str pointer to the string to analyze
1803 *
1804 * Parse an relative URI string and fills in the appropriate fields
1805 * of the uri structure
1806 *
1807 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1808 * abs_path = "/" path_segments
1809 * net_path = "//" authority [ abs_path ]
1810 * rel_path = rel_segment [ abs_path ]
1811 *
1812 * Returns 0 or the error code
1813 *
1814 * OOM: possible --> sets OOM flag and return XML_ERR_NO_MEMORY
1815 */
1816 static int
xmlParseRelativeURI(xmlURIPtr uri,const char ** str)1817 xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1818 int ret = 0;
1819 const char *cur;
1820
1821 if (str == NULL)
1822 return(-1);
1823
1824 cur = *str;
1825 if ((cur[0] == '/') && (cur[1] == '/')) {
1826 cur += 2;
1827
1828 ret = xmlParseURIAuthority(uri, &cur); // may set OOM flag
1829 if (ret != 0)
1830 return(ret);
1831 if (cur[0] == '/') {
1832 cur++;
1833 ret = xmlParseURIPathSegments(uri, &cur, 1); // may set OOM flag
1834 }
1835 } else if (cur[0] == '/') {
1836 cur++;
1837 ret = xmlParseURIPathSegments(uri, &cur, 1); // may set OOM flag
1838 } else if (cur[0] != '#' && cur[0] != '?') {
1839 ret = xmlParseURIRelSegment(uri, &cur); // may set OOM flag
1840 if (ret != 0)
1841 return(ret);
1842 if (cur[0] == '/') {
1843 cur++;
1844 ret = xmlParseURIPathSegments(uri, &cur, 1); // may set OOM flag
1845 }
1846 }
1847 if (ret != 0)
1848 return(ret);
1849 if (*cur == '?') {
1850 cur++;
1851 ret = xmlParseURIQuery(uri, &cur); // may set OOM flag
1852 if (ret != 0)
1853 return(ret);
1854 }
1855 *str = cur;
1856 return(ret);
1857 }
1858
1859 /**
1860 * xmlParseURIReference:
1861 * @param uri pointer to an URI structure
1862 * @param str the string to analyze
1863 *
1864 * Parse an URI reference string and fills in the appropriate fields
1865 * of the uri structure
1866 *
1867 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1868 *
1869 * Returns 0 or the error code
1870 *
1871 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY
1872 */
1873 XMLPUBFUNEXPORT int
xmlParseURIReference(xmlURIPtr uri,const char * str)1874 xmlParseURIReference(xmlURIPtr uri, const char *str) {
1875 int ret;
1876 const char *tmp = str;
1877
1878 if (!str)
1879 return(-1);
1880 xmlCleanURI(uri);
1881
1882 /*
1883 * Try first to parse absolute refs, then fallback to relative if
1884 * it fails.
1885 */
1886 ret = xmlParseAbsoluteURI(uri, &str); // may set OOM flag
1887 if (ret == XML_ERR_NO_MEMORY)
1888 return ret;
1889
1890 if (ret != 0) { // No, it is not an absolute URI, try it as a relative one...
1891 xmlCleanURI(uri);
1892 str = tmp;
1893 ret = xmlParseRelativeURI(uri, &str); // may set OOM flag
1894 if (ret == XML_ERR_NO_MEMORY)
1895 return ret;
1896 }
1897
1898 if (ret != 0) {
1899 xmlCleanURI(uri);
1900 return(ret);
1901 }
1902
1903 if (*str == '#') {
1904 str++;
1905 ret = xmlParseURIFragment(uri, &str); // may set OOM flag
1906 if (ret != 0) return(ret);
1907 }
1908 if (*str != 0) {
1909 xmlCleanURI(uri);
1910 return(1);
1911 }
1912 return(0);
1913 }
1914
1915 /**
1916 * xmlParseURI:
1917 * @param str the URI string to analyze
1918 *
1919 * Parse an URI
1920 *
1921 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1922 *
1923 * Returns a newly build xmlURIPtr or NULL in case of error
1924 *
1925 * OOM: possible --> returns NULL for uri!=NULL
1926 */
1927 XMLPUBFUNEXPORT xmlURIPtr
xmlParseURI(const char * str)1928 xmlParseURI(const char *str)
1929 {
1930 xmlURIPtr uri;
1931 int ret;
1932
1933 if (!str)
1934 return(NULL);
1935 uri = xmlCreateURI();
1936 if (uri) {
1937 ret = xmlParseURIReference(uri, str);
1938 if (ret) {
1939 xmlCleanURI(uri);
1940 xmlFreeURI(uri);
1941 return(NULL);
1942 }
1943 }
1944 return(uri);
1945 }
1946
1947 /************************************************************************
1948 * *
1949 * Public functions *
1950 * *
1951 ************************************************************************/
1952
1953 /**
1954 * xmlBuildURI:
1955 * @param URI the URI instance found in the document
1956 * @param base the base value
1957 *
1958 * Computes he final URI of the reference done by checking that
1959 * the given URI is valid, and building the final URI using the
1960 * base URI. This is processed according to section 5.2 of the
1961 * RFC 2396
1962 *
1963 * 5.2. Resolving Relative References to Absolute Form
1964 *
1965 * Returns a new URI string (to be freed by the caller) or NULL in case
1966 * of error.
1967 *
1968 * OOM:
1969 */
1970 XMLPUBFUNEXPORT xmlChar*
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1971 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1972 xmlChar *val = NULL;
1973 int ret, len, indx, cur, out;
1974 xmlURIPtr ref = NULL;
1975 xmlURIPtr bas = NULL;
1976 xmlURIPtr res = NULL;
1977
1978 /*
1979 * 1) The URI reference is parsed into the potential four components and
1980 * fragment identifier, as described in Section 4.3.
1981 *
1982 * NOTE that a completely empty URI is treated by modern browsers
1983 * as a reference to "." rather than as a synonym for the current
1984 * URI. Should we do that here?
1985 */
1986 if (!URI)
1987 ret = -1;
1988 else {
1989 if (*URI) {
1990 ref = xmlCreateURI();
1991 if (ref == NULL)
1992 goto done;
1993 ret = xmlParseURIReference(ref, (const char*) URI);
1994 }
1995 else
1996 ret = 0;
1997 }
1998 if (ret != 0)
1999 goto done;
2000 if ((ref != NULL) && (ref->scheme != NULL)) {
2001 /*
2002 * The URI is absolute don't modify.
2003 */
2004 val = xmlStrdup(URI);
2005 goto done;
2006 }
2007 if (base == NULL)
2008 ret = -1;
2009 else {
2010 bas = xmlCreateURI();
2011 if (bas == NULL)
2012 goto done;
2013 ret = xmlParseURIReference(bas, (const char *) base);
2014 }
2015 if (ret != 0) {
2016 if (ref)
2017 val = xmlSaveUri(ref);
2018 goto done;
2019 }
2020 if (ref == NULL) {
2021 /*
2022 * the base fragment must be ignored
2023 */
2024 if (bas->fragment != NULL) {
2025 xmlFree(bas->fragment);
2026 bas->fragment = NULL;
2027 }
2028 val = xmlSaveUri(bas);
2029 goto done;
2030 }
2031
2032 /*
2033 * 2) If the path component is empty and the scheme, authority, and
2034 * query components are undefined, then it is a reference to the
2035 * current document and we are done. Otherwise, the reference URI's
2036 * query and fragment components are defined as found (or not found)
2037 * within the URI reference and not inherited from the base URI.
2038 *
2039 * NOTE that in modern browsers, the parsing differs from the above
2040 * in the following aspect: the query component is allowed to be
2041 * defined while still treating this as a reference to the current
2042 * document.
2043 */
2044 res = xmlCreateURI();
2045 if (res == NULL)
2046 goto done;
2047 if ((ref->scheme == NULL) &&
2048 (ref->path == NULL) &&
2049 (ref->authority == NULL)&&
2050 (ref->server == NULL))
2051 {
2052 if (bas->scheme != NULL)
2053 res->scheme = xmlMemStrdup(bas->scheme);
2054 if (bas->authority != NULL)
2055 res->authority = xmlMemStrdup(bas->authority);
2056 else if (bas->server != NULL) {
2057 res->server = xmlMemStrdup(bas->server);
2058 if (bas->user != NULL)
2059 res->user = xmlMemStrdup(bas->user);
2060 res->port = bas->port;
2061 }
2062 if (bas->path != NULL)
2063 res->path = xmlMemStrdup(bas->path);
2064 if (ref->query != NULL)
2065 res->query = xmlMemStrdup(ref->query);
2066 else if (bas->query != NULL)
2067 res->query = xmlMemStrdup(bas->query);
2068 if (ref->fragment != NULL)
2069 res->fragment = xmlMemStrdup(ref->fragment);
2070 goto step_7;
2071 }
2072
2073 /*
2074 * 3) If the scheme component is defined, indicating that the reference
2075 * starts with a scheme name, then the reference is interpreted as an
2076 * absolute URI and we are done. Otherwise, the reference URI's
2077 * scheme is inherited from the base URI's scheme component.
2078 */
2079 if (ref->scheme != NULL) {
2080 val = xmlSaveUri(ref);
2081 goto done;
2082 }
2083 if (bas->scheme != NULL)
2084 res->scheme = xmlMemStrdup(bas->scheme);
2085
2086 if (ref->query != NULL)
2087 res->query = xmlMemStrdup(ref->query);
2088 if (ref->fragment != NULL)
2089 res->fragment = xmlMemStrdup(ref->fragment);
2090
2091 /*
2092 * 4) If the authority component is defined, then the reference is a
2093 * network-path and we skip to step 7. Otherwise, the reference
2094 * URI's authority is inherited from the base URI's authority
2095 * component, which will also be undefined if the URI scheme does not
2096 * use an authority component.
2097 */
2098 if ((ref->authority != NULL) || (ref->server != NULL)) {
2099 if (ref->authority != NULL)
2100 res->authority = xmlMemStrdup(ref->authority);
2101 else {
2102 res->server = xmlMemStrdup(ref->server);
2103 if (ref->user != NULL)
2104 res->user = xmlMemStrdup(ref->user);
2105 res->port = ref->port;
2106 }
2107 if (ref->path != NULL)
2108 res->path = xmlMemStrdup(ref->path);
2109 goto step_7;
2110 }
2111 if (bas->authority != NULL)
2112 res->authority = xmlMemStrdup(bas->authority);
2113 else if (bas->server != NULL) {
2114 res->server = xmlMemStrdup(bas->server);
2115 if (bas->user != NULL)
2116 res->user = xmlMemStrdup(bas->user);
2117 res->port = bas->port;
2118 }
2119
2120 /*
2121 * 5) If the path component begins with a slash character ("/"), then
2122 * the reference is an absolute-path and we skip to step 7.
2123 */
2124 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2125 res->path = xmlMemStrdup(ref->path);
2126 goto step_7;
2127 }
2128
2129
2130 /*
2131 * 6) If this step is reached, then we are resolving a relative-path
2132 * reference. The relative path needs to be merged with the base
2133 * URI's path. Although there are many ways to do this, we will
2134 * describe a simple method using a separate string buffer.
2135 *
2136 * Allocate a buffer large enough for the result string.
2137 */
2138 len = 2; /* extra / and 0 */
2139 if (ref->path != NULL)
2140 len += strlen(ref->path);
2141 if (bas->path != NULL)
2142 len += strlen(bas->path);
2143 res->path = (char *) xmlMallocAtomic(len);
2144 if (res->path == NULL) {
2145 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlBuildURI: out of memory\n"));
2146 goto done;
2147 }
2148 res->path[0] = 0;
2149
2150 /*
2151 * a) All but the last segment of the base URI's path component is
2152 * copied to the buffer. In other words, any characters after the
2153 * last (right-most) slash character, if any, are excluded.
2154 */
2155 cur = 0;
2156 out = 0;
2157 if (bas->path != NULL) {
2158 while (bas->path[cur] != 0) {
2159 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2160 cur++;
2161 if (bas->path[cur] == 0)
2162 break;
2163
2164 cur++;
2165 while (out < cur) {
2166 res->path[out] = bas->path[out];
2167 out++;
2168 }
2169 }
2170 }
2171 res->path[out] = 0;
2172
2173 /*
2174 * b) The reference's path component is appended to the buffer
2175 * string.
2176 */
2177 if (ref->path != NULL && ref->path[0] != 0) {
2178 indx = 0;
2179 /*
2180 * Ensure the path includes a '/'
2181 */
2182 if ((out == 0) && (bas->server != NULL))
2183 res->path[out++] = '/';
2184 while (ref->path[indx] != 0) {
2185 res->path[out++] = ref->path[indx++];
2186 }
2187 }
2188 res->path[out] = 0;
2189
2190 /*
2191 * Steps c) to h) are really path normalization steps
2192 */
2193 xmlNormalizeURIPath(res->path);
2194
2195 step_7:
2196
2197 /*
2198 * 7) The resulting URI components, including any inherited from the
2199 * base URI, are recombined to give the absolute form of the URI
2200 * reference.
2201 */
2202 val = xmlSaveUri(res);
2203
2204 done:
2205 if (ref)
2206 xmlFreeURI(ref);
2207 if (bas)
2208 xmlFreeURI(bas);
2209 if (res)
2210 xmlFreeURI(res);
2211 return(val);
2212 }
2213
2214 /**
2215 * xmlCanonicPath:
2216 * @param path the resource locator in a filesystem notation
2217 *
2218 * Constructs a canonic path from the specified path.
2219 *
2220 * Returns a new canonic path, or a duplicate of the path parameter if the
2221 * construction fails. The caller is responsible for freeing the memory occupied
2222 * by the returned string. If there is insufficient memory available, or the
2223 * argument is NULL, the function returns NULL.
2224 *
2225 * OOM:
2226 */
2227 #define IS_WINDOWS_PATH(p) \
2228 ((p != NULL) && \
2229 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2230 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2231 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2232 //ISSUE: Not finished reviewing for OOM handling / O.K.: 16.05.05
2233 XMLPUBFUNEXPORT xmlChar*
xmlCanonicPath(const xmlChar * path)2234 xmlCanonicPath(const xmlChar *path)
2235 {
2236 #if (defined(_WIN32)||defined(__SYMBIAN32__)) && !defined(__CYGWIN__)
2237 int len = 0;
2238 //int i = 0;
2239 xmlChar *p = NULL;
2240 #endif
2241 xmlChar *ret;
2242 xmlURIPtr uri;
2243
2244 // DO NOT REMOVE this check
2245 if (path == NULL)
2246 return(NULL);
2247
2248 if ((uri = xmlParseURI((const char *) path)) != NULL)
2249 {
2250 xmlFreeURI(uri);
2251 return xmlStrdup(path);
2252 }
2253 // It's should be OOM already!!! // ISSUE: Not finished work / OK: 16.05.05
2254 uri = xmlCreateURI();
2255 if (uri == NULL) {
2256 return(NULL);
2257 }
2258
2259 #if (defined(_WIN32)||defined(__SYMBIAN32__)) && !defined(__CYGWIN__)
2260 len = xmlStrlen(path);
2261 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2262 uri->scheme = (char*) xmlStrdup(BAD_CAST "file");
2263 uri->path = (char*) xmlMallocAtomic(len + 2);
2264 uri->path[0] = '/';
2265 p = (xmlChar*) uri->path + 1;
2266 strncpy((char*)p, (char*)path, len + 1);
2267 } else {
2268 uri->path = (char*) xmlStrdup(path);
2269 p = (xmlChar*) uri->path;
2270 }
2271 while (*p != '\0') {
2272 if (*p == '\\')
2273 *p = '/';
2274 p++;
2275 }
2276 #else
2277 uri->path = (char *) xmlStrdup((const xmlChar *) path);
2278 #endif
2279
2280 ret = xmlSaveUri(uri);
2281 xmlFreeURI(uri);
2282 return(ret);
2283 }
2284