1 /*
2 * The contents of this file are subject to the Mozilla Public
3 * License Version 1.1 (the "License"); you may not use this file
4 * except in compliance with the License. You may obtain a copy of
5 * the License at http://www.mozilla.org/MPL/
6 *
7 * Software distributed under the License is distributed on an "AS
8 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
9 * implied. See the License for the specific language governing
10 * rights and limitations under the License.
11 *
12 * The Original Code is the Sablotron XSLT Processor.
13 *
14 * The Initial Developer of the Original Code is Ginger Alliance Ltd.
15 * Portions created by Ginger Alliance are Copyright (C) 2000-2002
16 * Ginger Alliance Ltd. All Rights Reserved.
17 *
18 * Contributor(s):
19 *
20 * Alternatively, the contents of this file may be used under the
21 * terms of the GNU General Public License Version 2 or later (the
22 * "GPL"), in which case the provisions of the GPL are applicable
23 * instead of those above. If you wish to allow use of your
24 * version of this file only under the terms of the GPL and not to
25 * allow others to use your version of this file under the MPL,
26 * indicate your decision by deleting the provisions above and
27 * replace them with the notice and other provisions required by
28 * the GPL. If you do not delete the provisions above, a recipient
29 * may use your version of this file under either the MPL or the
30 * GPL.
31 */
32
33 /*****************************************************************
34 uri.cpp
35 *****************************************************************/
36
37 #include "uri.h"
38 #include <string.h>
39 #include "proc.h"
40 #include "platform.h"
41
42 // GP: clean
43
44 /*****************************************************************
45
46 global functions
47
48 *****************************************************************/
49
50 #define RF(CONDITION) {if (!(CONDITION)) return;}
51
52 // definition of names for various URI-reference parts
53 #define U_SCHEME 0
54 #define U_AUTH 1
55 #define U_PATH 2
56 #define U_QUERY 3
57 #define U_FRAG 4
58
59 // definition of slahes in path names
60 #define slashes "/\\"
61 #define isSlash(c) (c == '/' || c == '\\')
62
63 //special name reporting under windows
64 #ifdef __WIN_TOOLS
65 #define winName(name) ((name[0] == '/' && name[2] == ':') ? name + 1 : name)
66 #else
67 #define winName(name) name
68 #endif
69
70 /*****************************************************************
71 uriHasAuthority
72
73 decides, whether given schema should contain the authority
74 *****************************************************************/
75
uriHasAuthority(Str & scheme)76 Bool uriHasAuthority(Str & scheme)
77 {
78 return (scheme == (const char*)"file");
79 }
80
81 /*****************************************************************
82 splitBy
83
84 splits a given string into two parts divided by the first occurence
85 of a delimiter from a given set. If no delimiter is found, returns FALSE
86 and leaves 'string' as is; otherwise shifts 'string' to the character
87 following the delimiter.
88 ARGS:
89 string the asciiz string to be split
90 delims the asciiz set of delimiters (all of them ASCII chars)
91 part1 first of the two parts
92 RETURN:
93 string shifted to the other part (past the delimiter)
94 . the delimiter found (or 0)
95 *****************************************************************/
96
splitBy(const char * & string,const char * delims,Str & part1)97 char splitBy(const char *&string, const char *delims, Str &part1)
98 {
99 char c;
100 int firstLen = strcspn(string, delims);
101 part1.nset(string, firstLen);
102 if (!!(c = string[firstLen]))
103 string += firstLen + 1;
104 return c;
105 }
106
107 typedef Str FiveStr[5];
108
splitURI(const char * uri,FiveStr & parts)109 void splitURI(const char *uri, FiveStr &parts)
110 {
111 const char *rest;
112 char c;
113 for (int i = 0; i < 5; i++)
114 parts[i].empty();
115 RF( uri && *uri );
116 // extract the scheme part of the URI
117 if (!splitBy(rest = uri, ":", parts[U_SCHEME]))
118 parts[U_SCHEME].empty();
119 // if "//" follows, extract the authority part
120 c = 'A'; // marks the absence of auth
121 if (isSlash(*rest) && isSlash(rest[1]))
122 RF( c = splitBy(rest += 2, slashes"?#", parts[U_AUTH]) );
123 if (isSlash(c) || c == 'A')
124 // extract the path
125 RF( c = splitBy(rest -= (isSlash(c)), "?#", parts[U_PATH]) );
126 //query and fragment
127 if (c == '?')
128 // extract the query
129 RF( c = splitBy(rest, "#", parts[U_QUERY]) );
130 // copy the fragment
131 parts[U_FRAG] = (char *) rest;
132 };
133
joinURI(DStr & joined,FiveStr & parts,Bool schemeToo)134 void joinURI(DStr &joined, FiveStr &parts, Bool schemeToo)
135 {
136 joined.empty();
137 if (schemeToo && !parts[U_SCHEME].isEmpty())
138 joined = parts[U_SCHEME] + ":";
139 //if (!parts[U_AUTH].isEmpty())
140 if (uriHasAuthority(parts[U_SCHEME]))
141 joined += Str("//") + parts[U_AUTH]; // add authority
142 joined += parts[U_PATH]; // add path
143 if (!parts[U_QUERY].isEmpty()) // add query
144 joined += Str("?") + parts[U_QUERY];
145 if (!parts[U_FRAG].isEmpty()) // add fragment
146 joined += Str("#") + parts[U_FRAG];
147 }
148
149 /*****************************************************************
150 schemeToURI_()
151
152 converts the scheme given as Str to one of the URI_... constants.
153 If the scheme is neither "file" or "arg" then URI_EXTENSION is
154 simply returned.
155 *****************************************************************/
156
schemeToURI_(Sit S,Str & scheme)157 URIScheme schemeToURI_(Sit S, Str& scheme)
158 {
159 if (scheme.eqNoCase("file") && !S.hasFlag(SAB_FILES_TO_HANDLER))
160 return URI_FILE;
161 else
162 {
163 if (scheme.eqNoCase("arg"))
164 return URI_ARG;
165 else
166 return URI_EXTENSION;
167 }
168 }
169
170
171 /*****************************************************************
172 cutLast()
173
174 truncates a path after 'howmany'-th slash from the right (1-based).
175 If there are fewer slashes, sets path to empty string and returns
176 FALSE, otherwise returns TRUE.
177 ARGS
178 path the path to be truncated
179 howmany # of slashes that disappear in truncation, MINUS 1
180 RETURNS
181 . TRUE iff that many slashes were found
182 path the truncated path
183 *****************************************************************/
184
cutLast(Str & path,int howmany)185 Bool cutLast(Str& path, int howmany)
186 {
187 Str temp = path;
188 char *p = (char*) temp;
189 int slashCount = 0,
190 i;
191 for (i = temp.length() - 1; i >= 0; i--)
192 {
193 if (isSlash(p[i]))
194 slashCount++;
195 if (slashCount == howmany)
196 break;
197 };
198 if (i >= 0)
199 path.nset(p, i+1);
200 else
201 path.empty();
202 return (Bool)(i >= 0);
203 };
204
205 /*****************************************************************
206 joinPaths()
207
208 merges a relative path with a base path
209 ARGS
210 relPath the relative path. The result is returned here.
211 basePath the base path (always absolute)
212 RETURNS
213 relPath the newly constructed absolute path
214 *****************************************************************/
215
segP(Str & s,int oneOrTwo)216 Bool segP(Str &s, int oneOrTwo)
217 {
218 return (Bool) !strcmp((char *) s, (oneOrTwo == 1 ? "." : ".."));
219 }
220
joinPaths(Str & relPath,const Str & basePath)221 void joinPaths(Str& relPath, const Str& basePath)
222 {
223 Str segment;
224 DStr absPath;
225 // append the relPath to all-but-the-last-segment-of-basePath
226
227 Bool endSlash = cutLast(absPath = basePath, 1),
228 lastSeg;
229 DStr result = absPath + (endSlash? "" : "/") + relPath;
230
231 // throw out all '.' from the path
232 const char *p = (const char*) result;
233 absPath.empty();
234 while(splitBy(p, slashes, segment))
235 {
236 if (!segP(segment, 1))
237 absPath += segment + "/";
238 }
239 if (!segP(segment, 1))
240 absPath += segment;
241
242 // throw out all "something/.." from the path
243 p = (char*) absPath;
244 int depth = 0;
245 result.empty();
246 do
247 {
248 lastSeg = (Bool) !splitBy(p, slashes, segment);
249 if (!segP(segment, 2))
250 {
251 result += segment + (lastSeg ? "" : "/");
252 depth++;
253 }
254 else
255 {
256 if (depth > 1)
257 {
258 cutLast(result, 2);
259 depth--;
260 }
261 else
262 result += segment + (lastSeg ? "" : "/");
263 };
264 }
265 while(!lastSeg);
266 relPath = result;
267 }
268
269
makeAbsoluteURI(Sit S,const char * uri,const char * base,Str & absolute)270 URIScheme makeAbsoluteURI(Sit S, const char* uri,
271 const char* base, Str& absolute)
272 {
273 FiveStr
274 u_parts,
275 b_parts;
276 Bool
277 u_defined[5],
278 u_any = FALSE;
279 Str scheme;
280
281 // first, break up the URIs into their 5 components
282 splitURI(uri, u_parts);
283 splitURI(base, b_parts);
284
285 // set u_defined[i] to TRUE if the i-th uri component is nonvoid
286 for (int i = 0; i < 5; i++)
287 u_any = (Bool) ((u_defined[i] = (Bool) !u_parts[i].isEmpty()) || u_any);
288
289 if (!u_any) // all components empty: the reference is to the current document
290 {
291 splitURI(base,u_parts);
292 u_parts[U_QUERY].empty(); // query and fragment are NOT inherited from base
293 u_parts[U_FRAG].empty();
294 }
295 else // not all components are empty
296 {
297 if (!u_defined[U_SCHEME]) // undefined scheme
298 {
299 u_parts[U_SCHEME] = b_parts[U_SCHEME]; // inherit scheme from base
300 if (!u_defined[U_AUTH]) // undefined authority
301 {
302 u_parts[U_AUTH] = b_parts[U_AUTH]; // inherit authority from base
303 if (!isSlash(u_parts[U_PATH][0])) // path is relative
304 joinPaths(u_parts[U_PATH], b_parts[U_PATH]); // append path to base path
305 // query and fragment stay as they are in 'uri'
306 }
307 }
308 else
309 {
310 scheme = u_parts[U_SCHEME];
311
312 URIScheme uri_scheme = schemeToURI_(S, scheme);
313 if (uri_scheme == URI_EXTENSION)
314 {
315 absolute = uri;
316 return URI_EXTENSION;
317 }
318 // scheme defined, check for paths not starting with '/'
319 if (!u_defined[U_AUTH] && !isSlash(u_parts[U_PATH][0]))
320 u_parts[U_PATH] = Str("/") + u_parts[U_PATH];
321 }
322 }
323 DStr joined = absolute;
324 joinURI(joined, u_parts, FALSE); // join all components into a URI for return (no scheme)
325
326 scheme = u_parts[U_SCHEME];
327 absolute = (scheme + ":") + joined;
328 return schemeToURI_(S, scheme);
329 }
330
331
332 // URIScheme makeAbsoluteURI(uri, base, absolute)
333 //
334 // Merges a (possibly relative) URI reference with a base URI, setting
335 // 'absolute' to the result.
336 //
337
338 // URIScheme makeAbsoluteURI(Sit S, const char* uri,
339 // const char* base, Str& absolute)
340 // {
341 // return makeAbsoluteURI2(S, uri, base, absolute);
342 // }
343
344
uri2SchemePath(Sit S,const char * absolute,Str & scheme,Str & rest)345 URIScheme uri2SchemePath(Sit S, const char *absolute, Str& scheme, Str& rest)
346 {
347 Bool found = (Bool) !!splitBy(absolute, ":", scheme);
348 sabassert(found);
349 rest = (char*) absolute;
350 /*
351 * if (isSlash(*absolute) && isSlash(absolute[1]))
352 * rest = (char*) absolute + 2;
353 * else
354 * rest = (char*) absolute;
355 */
356 return schemeToURI_(S, scheme);
357 }
358
359
360 /*****************************************************************
361 DataLine
362
363 is a class that holds the machinery needed to retrieve data from
364 a given URI. There are two internally supported URI schemes:
365 file (the plain "file://...")
366 arg (for access to named memory blocks passed to Sablotron)
367
368 Other schemes are passed to the extending scheme handler (if
369 one has been registered). This way, requests such as http:...
370 can be processed.
371
372 The life cycle of a DataLine:
373 Upon construction, no URI is attached yet.
374 Call open() to associate a URI.
375 Repeatedly call save() or get() to retrieve data.
376 Call close() to close the resource.
377 Call the destructor.
378
379 The 'write' data line with the scheme of 'arg' will need to be
380 accessible to the user even after the Processor object is destroyed;
381 it is then freed by 'SablotFreeBuffer'.
382 *****************************************************************/
383
384 /*****************************************************************
385 DataLine::DataLine()
386
387 This constructor just sets everything to zeroes and such.
388 *****************************************************************/
389
DataLine()390 DataLine::DataLine()
391 {
392 mode = DLMODE_NONE;
393 scheme = URI_NONE;
394 f = NULL;
395 buffer = NULL;
396 outBuf = NULL;
397 bufCurr = 0;
398 fileIsStd = FALSE;
399 utf16Encoded = FALSE;
400 handler = NULL;
401 handlerUD = NULL;
402 handle = 0;
403 gotWholeDocument = FALSE;
404 }
405
406 /*****************************************************************
407 DataLine::~DataLine()
408
409 The destructor asserts that the data line had been closed.
410 *****************************************************************/
411
~DataLine()412 DataLine::~DataLine()
413 {
414 // removing the asserts (can be killed anytime due to error)
415 // assert(mode == DLMODE_CLOSED || mode == DLMODE_NONE);
416 // assert(!f);
417 // if there is an outBuf, delete it now
418 if (outBuf)
419 delete outBuf;
420 }
421
422 /*****************************************************************
423 DataLine::open()
424
425 Opens the data line for a given URI and access mode. Actual
426 data transfer is only done on subsequent get() or save() calls.
427 open() tries to call the extending scheme handler if it cannot
428 handle a request itself.
429
430 ARGS
431 _uri the URI identifier for the resource, including the
432 scheme (e.g. "file:///x.xml")
433 _baseUri the base URI used in case the reference in _uri is
434 relative
435 _mode the access mode (DLMODE_READ, DLMODE_WRITE)
436 *****************************************************************/
437
438 #define specErr1(S, code, arg) \
439 {if (ignoreErr) {Warn1(S,code,arg); return NOT_OK;} else Err1(S,code,arg);}
440
open(Sit S,const char * _uri,DLAccessMode _mode,StrStrList * argList_,Bool ignoreErr)441 eFlag DataLine::open(Sit S, const char *_uri, DLAccessMode _mode,
442 StrStrList* argList_, Bool ignoreErr /* = FALSE */)
443 {
444 sabassert(mode == DLMODE_NONE); // the buffer must not be open yet
445 // combine _uri and _baseUri into one
446 Str strScheme, strPath;
447 scheme = uri2SchemePath(S, _uri, strScheme, strPath);
448 char *name = (char*) strPath;
449
450 // mode set in the end
451 fullUri = (char*)_uri;
452
453 switch(scheme)
454 {
455 case URI_FILE:
456 {
457 if (name[0] == '/' && name[1] == '/')
458 name += 2; // skipping the "//" in front
459 // try to open the file
460 #ifdef _MSC_VER
461 if (!(f = stdopen(name,_mode == DLMODE_WRITE ? "wb" : "rb")))
462 #else
463 if (!(f = stdopen(name,_mode == DLMODE_WRITE ? "w" : "r")))
464 #endif
465 specErr1(S, E_FILE_OPEN, winName(name));
466 // set fileIsStd if filename is "stdin", "stdout" or "stderr"
467 fileIsStd = isstd(name);
468 }; break;
469 case URI_ARG:
470 {
471 // if opening for read access, get the pointer to the argument contents
472 // plus some extra information
473 if (_mode == DLMODE_READ)
474 {
475 Str *value = NULL;
476 if (argList_)
477 value = argList_ -> find(name);
478 if (!value)
479 specErr1(S, E1_ARG_NOT_FOUND, name);
480 buffer = (char*)*value;
481 }
482 // if opening for write access, just allocate a new dynamic block
483 else
484 outBuf = new DynBlock;
485 }; break;
486 default:
487 {
488 // try the extending scheme handler
489 // ask the handler address from the Processor
490 Processor *proc = S.getProcessor();
491 if (proc)
492 handler = proc->getSchemeHandler(&handlerUD);
493 else
494 handler = NULL;
495 // if there is no handler, report unsupported scheme
496 if (!handler)
497 specErr1(S, E1_UNSUPPORTED_SCHEME, strScheme);
498 // try the fast way
499 int count = 0;
500 buffer = NULL;
501 if (_mode == DLMODE_READ && handler -> getAll)
502 handler -> getAll(handlerUD, proc,
503 strScheme, name, &buffer, &count);
504 if (buffer && (count != -1))
505 {
506 gotWholeDocument = TRUE;
507 bufCurr = 0;
508 }
509 else
510 {
511 // call the handler's open() function, obtaining a handle
512 switch(handler -> open(handlerUD, proc,
513 strScheme, name, &handle))
514 {
515 case SH_ERR_UNSUPPORTED_SCHEME: // scheme not supported
516 specErr1(S, E1_UNSUPPORTED_SCHEME, strScheme);
517 case SH_ERR_NOT_OK:
518 specErr1(S, E1_URI_OPEN, strScheme + ":" + strPath);
519 };
520 }
521 };
522 };
523 // open successfully completed. Set the new mode.
524 mode = _mode;
525 return OK;
526 }
527
528 /*****************************************************************
529 DataLine::close()
530
531 closes the resource attached to this data line.
532 *****************************************************************/
close(Sit S)533 eFlag DataLine::close(Sit S)
534 {
535 sabassert(mode != DLMODE_NONE);
536 switch(scheme)
537 {
538 case URI_FILE:
539 {
540 sabassert(f);
541 if (!fileIsStd)
542 {
543 if (fclose(f))
544 Err1(S, E1_URI_CLOSE, fullUri);
545 };
546 f = NULL;
547 }; break;
548 case URI_ARG:
549 break;
550 case URI_EXTENSION:
551 {
552 if (gotWholeDocument)
553 {
554 NZ(handler) -> freeMemory(handlerUD, S.getProcessor(), buffer);
555 }
556 else
557 {
558 if(NZ(handler) -> close(handlerUD, S.getProcessor(), handle))
559 Err1(S, E1_URI_CLOSE, fullUri);
560 }
561 }; break;
562 };
563 mode = DLMODE_CLOSED;
564 return OK;
565 }
566
567 /*****************************************************************
568 save()
569
570 saves an UTF-8 string pointed to by data to the data line.
571 This is the place to perform any recoding, escaping and other operations
572 that require char-by-char scanning of the string.
573 *****************************************************************/
574
my_wcslen(const char * p)575 int my_wcslen(const char *p)
576 {
577 int len;
578 for (len = 2; *(short int*)p; p += 2, len += 2);
579 return len;
580 }
581
save(Sit S,const char * data,int length)582 eFlag DataLine::save(Sit S, const char *data, int length)
583 {
584 sabassert(mode == DLMODE_WRITE); // assume the file open for writing
585 // int length = utf16Encoded ? my_wcslen(data) : strlen(data);
586 switch (scheme) // choose the output procedure
587 {
588 case URI_FILE: // file: scheme
589 {
590 sabassert(f); // the file must be open
591 // fputs(data, f);
592 fwrite(data, 1, length, f);
593 }; break;
594 case URI_ARG: // arg: scheme
595 {
596 sabassert(outBuf); // the output buffer must exist
597 outBuf -> nadd(data, length);
598 }; break;
599 case URI_EXTENSION: // external handler
600 {
601 int actual = length;
602 if( NZ(handler) -> put(handlerUD, S.getProcessor(), handle, data, &actual) )
603 Err1(S, E1_URI_WRITE, fullUri);
604 };
605 }
606 return OK;
607 }
608
609 /*................................................................
610 pointsAtEnd()
611
612 DESCRIPTION
613 a macro that returns nonzero if the given char* points at a
614 string terminator
615
616 ARGS
617 p the pointer
618 is16 TRUE iff the string is UTF-16
619 ................................................................*/
620
621 #define pointsAtEnd(p, is16) ((is16) ? (!*(unsigned short*)(p)) : (!*(p)))
622
623 /*****************************************************************
624 get()
625
626 - retrieves at most 'maxcount' bytes into buffer 'dest'.
627 - input should be NUL-terminated
628 - if a terminating 0 is reached, copying stops
629 *****************************************************************/
630
get(Sit S,char * dest,int maxcount)631 int DataLine::get(Sit S, char *dest,int maxcount)
632 {
633 int result = 0;
634 sabassert(mode == DLMODE_READ); // assume the file open for reading
635 switch(scheme)
636 {
637 case URI_FILE:
638 {
639 sabassert(f); // the file must be open
640 result = fread(dest,1,maxcount,f);
641 // return the number of bytes read
642 }; break;
643 case URI_ARG:
644 {
645 sabassert(buffer); // the buffer must exist
646 // do a 'strncpy' that shifts dest and bufCurr;
647 // i counts the number of bytes transferred
648 char * copyChar = dest;
649 int i;
650 for (i = 0;
651 (!pointsAtEnd(buffer + bufCurr, utf16Encoded)) && (i < maxcount);
652 i++)
653 {
654 *(copyChar++) = buffer[bufCurr++];
655 };
656 result = i;
657 }; break;
658 case URI_EXTENSION: // external handler
659 {
660 if (gotWholeDocument)
661 {
662 // ugly hack: copied the following from above
663 sabassert(buffer); // the buffer must exist
664 char * copyChar = dest;
665 int i;
666 for (i = 0;
667 (!pointsAtEnd(buffer + bufCurr, utf16Encoded)) && (i < maxcount);
668 i++)
669 {
670 *(copyChar++) = buffer[bufCurr++];
671 };
672 result = i;
673 }
674 else
675 {
676 int actual = maxcount;
677 if( NZ(handler) -> get(handlerUD, S.getProcessor(), handle, dest, &actual) )
678 {
679 S.message( MT_ERROR, E1_URI_READ, fullUri, "" );
680 return -1;
681 }
682 result = actual;
683 }
684 }; break;
685 }
686 // need to NUL terminate in order to prevent C string
687 // functions running off the end of the buffer
688
689 // assignment assumes that the passed in dest is allocated
690 // one bigger than maxcount
691 dest[result] = '\0';
692 return result; // return the number of bytes read
693 }
694
695 /*****************************************************************
696 getOutBuffer()
697
698 returns the pointer to the output buffer which may be used after
699 all processing is finished (remains allocated along with the
700 whole DataLine)
701 *****************************************************************/
702
getOutBuffer()703 DynBlock* DataLine::getOutBuffer()
704 {
705 // check that the output buffer exists and that we're open for write
706 sabassert(mode == DLMODE_WRITE && scheme == URI_ARG);
707 return NZ(outBuf); // -> getPointer();
708 }
709
setURIAndClose(Sit S,const char * _uri)710 eFlag DataLine::setURIAndClose(Sit S, const char *_uri)
711 {
712 sabassert( mode == DLMODE_NONE );
713 mode = DLMODE_CLOSED;
714 scheme = URI_ARG;
715 fullUri = _uri;
716 return OK;
717 }
718
report(Sit S,MsgType type,MsgCode code,const Str & arg1,const Str & arg2)719 void DataLine::report(Sit S, MsgType type, MsgCode code, const Str& arg1, const Str& arg2)
720 {
721 S.message(type, code, arg1, arg2);
722 }
723
724