1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rdf_uri.c - RDF URI interface
4  *
5  * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
6  * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7  *
8  * This package is Free Software and part of Redland http://librdf.org/
9  *
10  * It is licensed under the following three licenses as alternatives:
11  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12  *   2. GNU General Public License (GPL) V2 or any newer version
13  *   3. Apache License, V2.0 or any newer version
14  *
15  * You may not use this file except in compliance with at least one of
16  * the above three licenses.
17  *
18  * See LICENSE.html or LICENSE.txt at the top of this package for the
19  * complete terms and further detail along with the license texts for
20  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21  *
22  *
23  */
24 
25 
26 #ifdef HAVE_CONFIG_H
27 #include <rdf_config.h>
28 #endif
29 
30 #ifdef WIN32
31 #include <win32_rdf_config.h>
32 #endif
33 
34 #include <stdio.h>
35 #include <string.h>
36 #include <ctype.h> /* for isalnum */
37 #ifdef WITH_THREADS
38 #include <pthread.h>
39 #endif
40 #ifdef HAVE_STDLIB_H
41 #include <stdlib.h>
42 #endif
43 
44 #include <redland.h>
45 
46 
47 #ifndef STANDALONE
48 
49 /* class methods */
50 
51 
52 /**
53  * librdf_init_uri:
54  * @world: redland world object
55  *
56  * INTERNAL - Initialise the uri module.
57  *
58  **/
59 void
librdf_init_uri(librdf_world * world)60 librdf_init_uri(librdf_world *world)
61 {
62 }
63 
64 
65 
66 /**
67  * librdf_finish_uri:
68  * @world: redland world object
69  *
70  * INTERNAL - Terminate the uri module.
71  *
72  **/
73 void
librdf_finish_uri(librdf_world * world)74 librdf_finish_uri(librdf_world *world)
75 {
76 }
77 
78 
79 
80 /**
81  * librdf_new_uri2:
82  * @world: redland world object
83  * @uri_string: URI in string form
84  * @length: length of string
85  *
86  * Constructor - create a new #librdf_uri object from a counted URI string.
87  *
88  * A new URI is constructed from a copy of the string.  If the string
89  * is a NULL pointer or 0 length or empty (first byte is 0) then the
90  * result is NULL.
91  *
92  * Return value: a new #librdf_uri object or NULL on failure
93  **/
94 librdf_uri*
librdf_new_uri2(librdf_world * world,const unsigned char * uri_string,size_t length)95 librdf_new_uri2(librdf_world *world,
96                 const unsigned char *uri_string,
97                 size_t length)
98 {
99   return raptor_new_uri_from_counted_string(world->raptor_world_ptr,
100                                             uri_string, length);
101 }
102 
103 
104 /**
105  * librdf_new_uri:
106  * @world: redland world object
107  * @uri_string: URI in string form
108  *
109  * Constructor - create a new #librdf_uri object from a URI string.
110  *
111  * A new URI is constructed from a copy of the string.  If the
112  * string is a NULL pointer or empty (0 length) then the result is NULL.
113  *
114  * Return value: a new #librdf_uri object or NULL on failure
115  **/
116 librdf_uri*
librdf_new_uri(librdf_world * world,const unsigned char * uri_string)117 librdf_new_uri(librdf_world *world,
118                const unsigned char *uri_string)
119 {
120   librdf_world_open(world);
121 
122   if(!uri_string || !*uri_string)
123     return NULL;
124 
125   return librdf_new_uri2(world, uri_string, strlen((const char*)uri_string));
126 }
127 
128 
129 /**
130  * librdf_new_uri_from_uri:
131  * @old_uri: #librdf_uri object
132  *
133  * Copy constructor - create a new librdf_uri object from an existing librdf_uri object.
134  *
135  * Return value: a new #librdf_uri object or NULL on failure
136  **/
137 librdf_uri*
librdf_new_uri_from_uri(librdf_uri * old_uri)138 librdf_new_uri_from_uri (librdf_uri* old_uri)
139 {
140 
141   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(old_uri, librdf_uri, NULL);
142 
143   return raptor_uri_copy(old_uri);
144 }
145 
146 
147 /**
148  * librdf_new_uri_from_uri_local_name:
149  * @old_uri: #librdf_uri object
150  * @local_name: local name to append to URI
151  *
152  * Copy constructor - create a new librdf_uri object from an existing librdf_uri object and a local name.
153  *
154  * Return value: a new #librdf_uri object or NULL on failure
155  **/
156 librdf_uri*
librdf_new_uri_from_uri_local_name(librdf_uri * old_uri,const unsigned char * local_name)157 librdf_new_uri_from_uri_local_name (librdf_uri* old_uri,
158                                     const unsigned char *local_name)
159 {
160   return raptor_new_uri_from_uri_local_name(raptor_uri_get_world(old_uri),
161                                             old_uri, local_name);
162 }
163 
164 
165 /**
166  * librdf_new_uri_normalised_to_base:
167  * @uri_string: URI in string form
168  * @source_uri: source URI to remove
169  * @base_uri: base URI to add
170  *
171  * Constructor - create a new #librdf_uri object from a URI string stripped of the source URI, made relative to the base URI.
172  *
173  * Return value: a new #librdf_uri object or NULL on failure
174  **/
175 librdf_uri*
librdf_new_uri_normalised_to_base(const unsigned char * uri_string,librdf_uri * source_uri,librdf_uri * base_uri)176 librdf_new_uri_normalised_to_base(const unsigned char *uri_string,
177                                   librdf_uri* source_uri,
178                                   librdf_uri* base_uri)
179 {
180   size_t uri_string_len;
181   size_t len;
182   unsigned char *new_uri_string;
183   librdf_uri *new_uri;
184   unsigned char* source_uri_string;
185   size_t source_uri_string_length;
186   unsigned char* base_uri_string;
187   size_t base_uri_string_length;
188 
189 
190   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(source_uri, librdf_uri, NULL);
191   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(base_uri, librdf_uri, NULL);
192 
193   if(!uri_string)
194     return NULL;
195 
196   /* empty URI - easy, just make from base_uri */
197   if(!*uri_string && base_uri) {
198     return raptor_uri_copy(base_uri);
199   }
200 
201   source_uri_string = librdf_uri_as_counted_string(source_uri,
202                                                    &source_uri_string_length);
203   base_uri_string = librdf_uri_as_counted_string(base_uri,
204                                                  &base_uri_string_length);
205 
206   /* not a fragment, and no match - easy */
207   if(*uri_string != '#' &&
208      strncmp((const char*)uri_string, (const char*)source_uri_string,
209              source_uri_string_length)) {
210     raptor_world* rworld = raptor_uri_get_world(base_uri);
211     return raptor_new_uri(rworld, uri_string);
212   }
213 
214   /* darn - is a fragment or matches, is a prefix of the source URI */
215 
216   /* move uri_string pointer to first non-matching char
217    * unless a fragment, when all of the uri_string will
218    * be appended
219    */
220   if(*uri_string != '#')
221     uri_string += source_uri_string_length;
222 
223   /* size of remaining bytes to copy from uri_string */
224   uri_string_len = strlen((const char*)uri_string);
225 
226   /* total bytes */
227   len = uri_string_len + 1 + base_uri_string_length;
228 
229   new_uri_string = LIBRDF_MALLOC(unsigned char*, len);
230   if(!new_uri_string)
231     return NULL;
232   strncpy((char*)new_uri_string, (const char*)base_uri_string,
233           base_uri_string_length);
234   /* strcpy not strncpy since I want a \0 on the end */
235   strcpy((char*)new_uri_string + base_uri_string_length,
236          (const char*)uri_string);
237 
238   new_uri = raptor_new_uri(raptor_uri_get_world(source_uri), new_uri_string);
239   LIBRDF_FREE(char*, new_uri_string); /* always free this even on failure */
240 
241   return new_uri; /* new URI or NULL from librdf_new_uri failure */
242 }
243 
244 
245 
246 /**
247  * librdf_new_uri_relative_to_base:
248  * @base_uri: absolute base URI
249  * @uri_string: relative URI string
250  *
251  * Constructor - create a new #librdf_uri object from a URI string relative to a base URI.
252  *
253  * An empty uri_string or NULL is equivalent to
254  * librdf_new_uri_from_uri(base_uri)
255  *
256  * Return value: a new #librdf_uri object or NULL on failure
257  **/
258 librdf_uri*
librdf_new_uri_relative_to_base(librdf_uri * base_uri,const unsigned char * uri_string)259 librdf_new_uri_relative_to_base(librdf_uri* base_uri,
260                                 const unsigned char *uri_string)
261 {
262   return raptor_new_uri_relative_to_base(raptor_uri_get_world(base_uri),
263                                          base_uri,
264                                          uri_string);
265 }
266 
267 
268 /**
269  * librdf_new_uri_from_filename:
270  * @world: Redland #librdf_world object
271  * @filename: filename
272  *
273  * Constructor - create a new #librdf_uri object from a filename.
274  *
275  * Return value: a new #librdf_uri object or NULL on failure
276  **/
277 librdf_uri*
librdf_new_uri_from_filename(librdf_world * world,const char * filename)278 librdf_new_uri_from_filename(librdf_world* world, const char *filename) {
279   librdf_uri* new_uri;
280   unsigned char *uri_string;
281 
282   librdf_world_open(world);
283 
284   if(!filename)
285     return NULL;
286 
287   uri_string=raptor_uri_filename_to_uri_string(filename);
288   if(!uri_string)
289     return NULL;
290 
291   new_uri=librdf_new_uri(world, uri_string);
292   raptor_free_memory(uri_string);
293   return new_uri;
294 }
295 
296 
297 
298 /**
299  * librdf_free_uri:
300  * @uri: #librdf_uri object
301  *
302  * Destructor - destroy a #librdf_uri object.
303  *
304  **/
305 void
librdf_free_uri(librdf_uri * uri)306 librdf_free_uri(librdf_uri* uri)
307 {
308   if(!uri)
309     return;
310 
311   raptor_free_uri(uri);
312 }
313 
314 
315 /**
316  * librdf_uri_as_string:
317  * @uri: #librdf_uri object
318  *
319  * Get a pointer to the string representation of the URI.
320  *
321  * Returns a shared pointer to the URI string representation.
322  * Note: does not allocate a new string so the caller must not free it.
323  *
324  * Return value: string representation of URI
325  **/
326 unsigned char*
librdf_uri_as_string(librdf_uri * uri)327 librdf_uri_as_string (librdf_uri *uri)
328 {
329   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(uri, librdf_uri, NULL);
330 
331   return raptor_uri_as_string(uri);
332 }
333 
334 
335 /**
336  * librdf_uri_as_counted_string:
337  * @uri: #librdf_uri object
338  * @len_p: pointer to location to store length
339  *
340  * Get a pointer to the string representation of the URI with length.
341  *
342  * Returns a shared pointer to the URI string representation.
343  * Note: does not allocate a new string so the caller must not free it.
344  *
345  * Return value: string representation of URI
346  **/
347 unsigned char*
librdf_uri_as_counted_string(librdf_uri * uri,size_t * len_p)348 librdf_uri_as_counted_string(librdf_uri *uri, size_t* len_p)
349 {
350   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(uri, librdf_uri, NULL);
351 
352   return raptor_uri_as_counted_string(uri, len_p);
353 }
354 
355 
356 /**
357  * librdf_uri_get_digest:
358  * @world: #librdf_world object
359  * @uri: #librdf_uri object
360  *
361  * Get a digest for the URI.
362  *
363  * Generates a digest object for the URI.  The digest factory used is
364  * determined at class initialisation time by librdf_init_uri().
365  *
366  * Return value: new #librdf_digest object or NULL on failure.
367  **/
368 librdf_digest*
librdf_uri_get_digest(librdf_world * world,librdf_uri * uri)369 librdf_uri_get_digest(librdf_world* world, librdf_uri* uri)
370 {
371   librdf_digest* d;
372   unsigned char *str;
373   size_t len;
374 
375   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(uri, librdf_uri, NULL);
376 
377   d = librdf_new_digest_from_factory(world, world->digest_factory);
378   if(!d)
379     return NULL;
380 
381   str = librdf_uri_as_counted_string(uri, &len);
382 
383   librdf_digest_update(d, str, len);
384   librdf_digest_final(d);
385 
386   return d;
387 }
388 
389 
390 /**
391  * librdf_uri_print:
392  * @uri: #librdf_uri object
393  * @fh: file handle
394  *
395  * Print the URI to the given file handle.
396  *
397  **/
398 void
librdf_uri_print(librdf_uri * uri,FILE * fh)399 librdf_uri_print (librdf_uri* uri, FILE *fh)
400 {
401   LIBRDF_ASSERT_OBJECT_POINTER_RETURN(uri, librdf_uri);
402 
403   fputs((const char*)librdf_uri_as_string(uri), fh);
404 }
405 
406 
407 /**
408  * librdf_uri_to_string:
409  * @uri: #librdf_uri object
410  *
411  * Format the URI as a string.
412  *
413  * Note: this method allocates a new string since this is a _to_ method
414  * and the caller must free the resulting memory.
415  *
416  * Return value: string representation of the URI or NULL on failure
417  **/
418 unsigned char*
librdf_uri_to_string(librdf_uri * uri)419 librdf_uri_to_string (librdf_uri* uri)
420 {
421   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(uri, librdf_uri, NULL);
422 
423   return librdf_uri_to_counted_string(uri, NULL);
424 }
425 
426 
427 /**
428  * librdf_uri_to_counted_string:
429  * @uri: #librdf_uri object
430  * @len_p: pointer to location to store length
431  *
432  * Format the URI as a counted string.
433  *
434  * Note: this method allocates a new string since this is a _to_ method
435  * and the caller must free the resulting memory.
436  *
437  * Return value: string representation of the URI or NULL on failure
438  **/
439 unsigned char*
librdf_uri_to_counted_string(librdf_uri * uri,size_t * len_p)440 librdf_uri_to_counted_string (librdf_uri* uri, size_t* len_p)
441 {
442   return raptor_uri_to_counted_string(uri, len_p);
443 }
444 
445 
446 /**
447  * librdf_uri_equals:
448  * @first_uri: #librdf_uri object 1
449  * @second_uri: #librdf_uri object 2
450  *
451  * Compare two librdf_uri objects for equality.
452  *
453  * Return value: non 0 if the objects are equal
454  **/
455 int
librdf_uri_equals(librdf_uri * first_uri,librdf_uri * second_uri)456 librdf_uri_equals(librdf_uri* first_uri, librdf_uri* second_uri)
457 {
458   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(first_uri, librdf_uri, 0);
459   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(second_uri, librdf_uri, 0);
460 
461   return raptor_uri_equals(first_uri, second_uri);
462 }
463 
464 
465 /**
466  * librdf_uri_is_file_uri:
467  * @uri: #librdf_uri object
468  *
469  * Test if a URI points to a filename.
470  *
471  * Return value: Non zero if the URI points to a file
472  **/
473 int
librdf_uri_is_file_uri(librdf_uri * uri)474 librdf_uri_is_file_uri(librdf_uri* uri)
475 {
476   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(uri, librdf_uri, 1);
477 
478   return raptor_uri_uri_string_is_file_uri(librdf_uri_as_string(uri));
479 }
480 
481 
482 /**
483  * librdf_uri_to_filename:
484  * @uri: #librdf_uri object
485  *
486  * Return pointer to filename of URI.
487  *
488  * Returns a pointer to a newly allocated buffer that
489  * the caller must free.  This will fail if the URI
490  * is not a file: URI.  This can be checked with #librdf_uri_is_file_uri
491  *
492  * Return value: pointer to filename or NULL on failure
493  **/
494 const char*
librdf_uri_to_filename(librdf_uri * uri)495 librdf_uri_to_filename(librdf_uri* uri)
496 {
497   LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(uri, librdf_uri, NULL);
498 
499   return raptor_uri_uri_string_to_filename(librdf_uri_as_string(uri));
500 
501 }
502 
503 
504 /**
505  * librdf_uri_compare:
506  * @uri1: #librdf_uri object 1 or NULL
507  * @uri2: #librdf_uri object 2 or NULL
508  *
509  * Compare two librdf_uri objects lexicographically.
510  *
511  * A NULL URI is always less than (never equal to) a non-NULL URI.
512  *
513  * Return value: <0 if @uri1 is less than @uri2, 0 if equal, >0 if @uri1 is greater than @uri2
514  **/
515 int
librdf_uri_compare(librdf_uri * uri1,librdf_uri * uri2)516 librdf_uri_compare(librdf_uri* uri1, librdf_uri* uri2)
517 {
518   return raptor_uri_compare(uri1, uri2);
519 }
520 
521 
522 #endif
523 
524 
525 /* TEST CODE */
526 
527 
528 #ifdef STANDALONE
529 
530 /* one more prototype */
531 int main(int argc, char *argv[]);
532 
533 
534 int
main(int argc,char * argv[])535 main(int argc, char *argv[])
536 {
537   const unsigned char *hp_string=(const unsigned char*)"http://purl.org/net/dajobe/";
538   librdf_uri *uri1, *uri2, *uri3, *uri4, *uri5, *uri6, *uri7, *uri8, *uri9;
539   librdf_digest *d;
540   const char *program=librdf_basename((const char*)argv[0]);
541   const char *file_string="/big/long/directory/file";
542   const unsigned char *file_uri_string=(const unsigned char*)"file:///big/long/directory/file";
543   const unsigned char *uri_string=(const unsigned char*)"http://example.com/big/long/directory/blah#frag";
544   const unsigned char *relative_uri_string1=(const unsigned char*)"#foo";
545   const unsigned char *relative_uri_string2=(const unsigned char*)"bar";
546   librdf_world *world;
547 
548   world=librdf_new_world();
549   librdf_world_open(world);
550 
551   fprintf(stderr, "%s: Creating new URI from string\n", program);
552   uri1=librdf_new_uri(world, hp_string);
553   if(!uri1) {
554     fprintf(stderr, "%s: Failed to create URI from string '%s'\n", program,
555 	    hp_string);
556     return(1);
557   }
558 
559   fprintf(stderr, "%s: Home page URI is ", program);
560   librdf_uri_print(uri1, stderr);
561   fputs("\n", stderr);
562 
563   fprintf(stderr, "%s: Creating URI from URI\n", program);
564   uri2=librdf_new_uri_from_uri(uri1);
565   if(!uri2) {
566     fprintf(stderr, "%s: Failed to create new URI from old one\n", program);
567     return(1);
568   }
569 
570   fprintf(stderr, "%s: New URI is ", program);
571   librdf_uri_print(uri2, stderr);
572   fputs("\n", stderr);
573 
574 
575   fprintf(stderr, "%s: Getting digest for URI\n", program);
576   d = librdf_uri_get_digest(world, uri2);
577   if(!d) {
578     fprintf(stderr, "%s: Failed to get digest for URI %s\n", program,
579 	    librdf_uri_as_string(uri2));
580     return(1);
581   }
582   fprintf(stderr, "%s: Digest is: ", program);
583   librdf_digest_print(d, stderr);
584   fputs("\n", stderr);
585   librdf_free_digest(d);
586 
587   uri3=librdf_new_uri(world, (const unsigned char*)"file:/big/long/directory/");
588   uri4=librdf_new_uri(world, (const unsigned char*)"http://somewhere/dir/");
589   fprintf(stderr, "%s: Source URI is ", program);
590   librdf_uri_print(uri3, stderr);
591   fputs("\n", stderr);
592   fprintf(stderr, "%s: Base URI is ", program);
593   librdf_uri_print(uri4, stderr);
594   fputs("\n", stderr);
595   fprintf(stderr, "%s: URI string is '%s'\n", program, uri_string);
596 
597   uri5=librdf_new_uri_normalised_to_base(uri_string, uri3, uri4);
598   fprintf(stderr, "%s: Normalised URI is ", program);
599   librdf_uri_print(uri5, stderr);
600   fputs("\n", stderr);
601 
602 
603   uri6=librdf_new_uri_relative_to_base(uri5, relative_uri_string1);
604   fprintf(stderr, "%s: URI + Relative URI %s gives ", program,
605           relative_uri_string1);
606   librdf_uri_print(uri6, stderr);
607   fputs("\n", stderr);
608 
609   uri7=librdf_new_uri_relative_to_base(uri5, relative_uri_string2);
610   fprintf(stderr, "%s: URI + Relative URI %s gives ", program,
611           relative_uri_string2);
612   librdf_uri_print(uri7, stderr);
613   fputs("\n", stderr);
614 
615   uri8=librdf_new_uri_from_filename(world, file_string);
616   uri9=librdf_new_uri(world, file_uri_string);
617   if(!librdf_uri_equals(uri8, uri9)) {
618     fprintf(stderr, "%s: URI string from filename %s returned %s, expected %s\n", program, file_string, librdf_uri_as_string(uri8), file_uri_string);
619     return(1);
620   }
621 
622   fprintf(stderr, "%s: Freeing URIs\n", program);
623   librdf_free_uri(uri1);
624   librdf_free_uri(uri2);
625   librdf_free_uri(uri3);
626   librdf_free_uri(uri4);
627   librdf_free_uri(uri5);
628   librdf_free_uri(uri6);
629   librdf_free_uri(uri7);
630   librdf_free_uri(uri8);
631   librdf_free_uri(uri9);
632 
633   librdf_free_world(world);
634 
635   /* keep gcc -Wall happy */
636   return(0);
637 }
638 
639 #endif
640