1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * raptor_www.c - Raptor WWW retrieval core
4 *
5 * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/
6 * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/
7 *
8 * This package is Free Software and part of Redland http://librdf.org/
9 *
10 * It is licensed under the following three licenses as alternatives:
11 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12 * 2. GNU General Public License (GPL) V2 or any newer version
13 * 3. Apache License, V2.0 or any newer version
14 *
15 * You may not use this file except in compliance with at least one of
16 * the above three licenses.
17 *
18 * See LICENSE.html or LICENSE.txt at the top of this package for the
19 * complete terms and further detail along with the license texts for
20 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21 *
22 *
23 */
24
25
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29
30 #ifdef WIN32
31 #include <win32_raptor_config.h>
32 #endif
33
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdarg.h>
37 #ifdef HAVE_ERRNO_H
38 #include <errno.h>
39 #endif
40 #ifdef HAVE_SYS_STAT_H
41 #include <sys/stat.h>
42 #endif
43
44 /* Raptor includes */
45 #include "raptor.h"
46 #include "raptor_internal.h"
47
48
49 static int raptor_www_init_common(int skip_www_init_finish, int *www_initialized);
50 static void raptor_www_finish_common(int skip_www_init_finish);
51 static int raptor_www_file_fetch(raptor_www* www);
52
53
54
55 #ifndef RAPTOR_DISABLE_V1
56 /* should raptor_www do initializing and cleanup of the WWW library */
57 static int raptor_www_skip_www_init_finish=0;
58 static int raptor_www_initialized=0;
59 #endif
60
61
62 #ifndef RAPTOR_DISABLE_V1
63 /**
64 * raptor_www_init:
65 *
66 * Initialise the WWW class.
67 *
68 * Must be called before creating any #raptor_www object.
69 *
70 * See also: raptor_www_init_v2()
71 **/
72 void
raptor_www_init(void)73 raptor_www_init(void)
74 {
75 raptor_www_init_common(raptor_www_skip_www_init_finish, &raptor_www_initialized);
76 }
77 #endif
78
79
80 /**
81 * raptor_www_init_v2:
82 * @world: raptor_world object
83 *
84 * Initialise the WWW class.
85 *
86 * Must be called before creating any #raptor_www object.
87 *
88 * See also: raptor_www_init()
89 *
90 * Return value: non-0 on failure
91 **/
92 int
raptor_www_init_v2(raptor_world * world)93 raptor_www_init_v2(raptor_world* world)
94 {
95 #ifndef RAPTOR_DISABLE_V1
96 /* support legacy v1 raptor_www_no_www_library_init_finish() */
97 if(raptor_www_skip_www_init_finish)
98 world->www_skip_www_init_finish = raptor_www_skip_www_init_finish;
99
100 /* skip init if already inited with legacy init() */
101 if(raptor_www_initialized)
102 return 0;
103 #endif
104
105 return raptor_www_init_common(world->www_skip_www_init_finish, &world->www_initialized);
106 }
107
108
109 static int
raptor_www_init_common(int skip_www_init_finish,int * www_initialized)110 raptor_www_init_common(int skip_www_init_finish, int *www_initialized)
111 {
112 int rc = 0;
113
114 if(*www_initialized)
115 return 0;
116
117 if(!skip_www_init_finish) {
118 #ifdef RAPTOR_WWW_LIBCURL
119 rc = curl_global_init(CURL_GLOBAL_ALL);
120 #endif
121 }
122
123 *www_initialized = 1;
124 return rc;
125 }
126
127
128 #ifndef RAPTOR_DISABLE_V1
129 /**
130 * raptor_www_no_www_library_init_finish:
131 *
132 * Do not initialise or finish the lower level WWW library.
133 *
134 * If this is called then the raptor_www library will neither
135 * initialise or terminate the lower level WWW library. Usually in
136 * raptor_init either curl_global_init (for libcurl)
137 * are called and in raptor_finish curl_global_cleanup is called.
138 *
139 * This allows the application finer control over these libraries such
140 * as setting other global options or potentially calling and terminating
141 * raptor several times. It does mean that applications which use
142 * this call must do their own extra work in order to allocate and free
143 * all resources to the system.
144 *
145 * This function must be called before raptor_init.
146 *
147 * See also: raptor_www_no_www_library_init_finish_v2()
148 *
149 **/
150 void
raptor_www_no_www_library_init_finish(void)151 raptor_www_no_www_library_init_finish(void)
152 {
153 raptor_www_skip_www_init_finish = 1;
154 }
155 #endif
156
157
158 /**
159 * raptor_www_no_www_library_init_finish_v2:
160 * @world: raptor_world object
161 *
162 * Do not initialise or finish the lower level WWW library.
163 *
164 * If this is called then the raptor_www library will neither
165 * initialise or terminate the lower level WWW library. Usually in
166 * raptor_world_open() either curl_global_init (for libcurl)
167 * are called and in raptor_finish curl_global_cleanup is called.
168 *
169 * This allows the application finer control over these libraries such
170 * as setting other global options or potentially calling and terminating
171 * raptor several times. It does mean that applications which use
172 * this call must do their own extra work in order to allocate and free
173 * all resources to the system.
174 *
175 * This function must be called before raptor_world_open().
176 *
177 **/
178 void
raptor_www_no_www_library_init_finish_v2(raptor_world * world)179 raptor_www_no_www_library_init_finish_v2(raptor_world* world)
180 {
181 world->www_skip_www_init_finish = 1;
182 }
183
184
185 #ifndef RAPTOR_DISABLE_V1
186 /**
187 * raptor_www_finish:
188 *
189 * Terminate the WWW class.
190 *
191 * Must be called to clean any resources used by the WWW implementation.
192 *
193 * See also: raptor_www_finish_v2()
194 **/
195 void
raptor_www_finish(void)196 raptor_www_finish(void)
197 {
198 raptor_www_finish_common(raptor_www_skip_www_init_finish);
199 }
200 #endif
201
202
203 /**
204 * raptor_www_finish_v2:
205 * @world: raptor_world object
206 *
207 * Terminate the WWW class.
208 *
209 * Must be called to clean any resources used by the WWW implementation.
210 *
211 * See also: raptor_www_finish()
212 **/
213 void
raptor_www_finish_v2(raptor_world * world)214 raptor_www_finish_v2(raptor_world* world)
215 {
216 raptor_www_finish_common(world->www_skip_www_init_finish);
217 }
218
219
220 static void
raptor_www_finish_common(int skip_www_init_finish)221 raptor_www_finish_common(int skip_www_init_finish)
222 {
223 if(!skip_www_init_finish) {
224 #ifdef RAPTOR_WWW_LIBCURL
225 curl_global_cleanup();
226 #endif
227 }
228 }
229
230
231 #ifndef RAPTOR_DISABLE_V1
232 /**
233 * raptor_www_new_with_connection:
234 * @connection: external WWW connection object.
235 *
236 * Constructor - create a new #raptor_www object over an existing WWW connection.
237 *
238 * At present this only works with a libcurl CURL handle object
239 * when raptor is compiled with libcurl suppport. Otherwise the
240 * @connection is ignored. This allows such things as setting
241 * up special flags on the curl handle before passing into the constructor.
242 *
243 * raptor_init() MUST have been called before calling this function.
244 * Use raptor_www_new_with_connection_v2() if using raptor_world APIs.
245 *
246 * Return value: a new #raptor_www object or NULL on failure.
247 **/
248 raptor_www*
raptor_www_new_with_connection(void * connection)249 raptor_www_new_with_connection(void *connection)
250 {
251 return raptor_www_new_with_connection_v2(raptor_world_instance(), connection);
252 }
253 #endif
254
255
256 /**
257 * raptor_www_new_with_connection_v2:
258 * @world: raptor_world object
259 * @connection: external WWW connection object.
260 *
261 * Constructor - create a new #raptor_www object over an existing WWW connection.
262 *
263 * At present this only works with a libcurl CURL handle object
264 * when raptor is compiled with libcurl suppport. Otherwise the
265 * @connection is ignored. This allows such things as setting
266 * up special flags on the curl handle before passing into the constructor.
267 *
268 * Return value: a new #raptor_www object or NULL on failure.
269 **/
270 raptor_www*
raptor_www_new_with_connection_v2(raptor_world * world,void * connection)271 raptor_www_new_with_connection_v2(raptor_world* world, void *connection)
272 {
273 raptor_www* www=(raptor_www* )RAPTOR_CALLOC(www, 1, sizeof(raptor_www));
274 if(!www)
275 return NULL;
276
277 www->world=world;
278 www->type=NULL;
279 www->free_type=1; /* default is to free content type */
280 www->total_bytes=0;
281 www->failed=0;
282 www->status_code=0;
283 www->write_bytes=NULL;
284 www->content_type=NULL;
285 www->uri_filter=NULL;
286 www->connection_timeout=10;
287 www->cache_control=NULL;
288
289 #ifdef RAPTOR_WWW_LIBCURL
290 www->curl_handle=(CURL*)connection;
291 raptor_www_curl_init(www);
292 #endif
293 #ifdef RAPTOR_WWW_LIBXML
294 raptor_www_libxml_init(www);
295 #endif
296 #ifdef RAPTOR_WWW_LIBFETCH
297 raptor_www_libfetch_init(www);
298 #endif
299
300 www->error_handlers.locator=&www->locator;
301 raptor_error_handlers_init_v2(world, &www->error_handlers);
302
303 return www;
304 }
305
306
307 #ifndef RAPTOR_DISABLE_V1
308 /**
309 * raptor_www_new:
310 *
311 * Constructor - create a new #raptor_www object.
312 *
313 * raptor_init() MUST have been called before calling this function.
314 * Use raptor_www_new_v2() if using raptor_world APIs.
315 *
316 * Return value: a new #raptor_www or NULL on failure.
317 **/
318 raptor_www*
raptor_www_new(void)319 raptor_www_new(void)
320 {
321 return raptor_www_new_v2(raptor_world_instance());
322 }
323 #endif
324
325
326 /**
327 * raptor_www_new_v2:
328 * @world: raptor_world object
329 *
330 * Constructor - create a new #raptor_www object.
331 *
332 * Return value: a new #raptor_www or NULL on failure.
333 **/
334 raptor_www*
raptor_www_new_v2(raptor_world * world)335 raptor_www_new_v2(raptor_world* world)
336 {
337 return raptor_www_new_with_connection_v2(world, NULL);
338 }
339
340
341 /**
342 * raptor_www_free:
343 * @www: WWW object.
344 *
345 * Destructor - destroy a #raptor_www object.
346 **/
347 void
raptor_www_free(raptor_www * www)348 raptor_www_free(raptor_www* www)
349 {
350 /* free context */
351 if(www->type) {
352 if(www->free_type)
353 RAPTOR_FREE(cstring, www->type);
354 www->type=NULL;
355 }
356
357 if(www->user_agent) {
358 RAPTOR_FREE(cstring, www->user_agent);
359 www->user_agent=NULL;
360 }
361
362 if(www->cache_control) {
363 RAPTOR_FREE(cstring, www->cache_control);
364 www->cache_control=NULL;
365 }
366
367 if(www->proxy) {
368 RAPTOR_FREE(cstring, www->proxy);
369 www->proxy=NULL;
370 }
371
372 if(www->http_accept) {
373 RAPTOR_FREE(cstring, www->http_accept);
374 www->http_accept=NULL;
375 }
376
377 #ifdef RAPTOR_WWW_LIBCURL
378 raptor_www_curl_free(www);
379 #endif
380 #ifdef RAPTOR_WWW_LIBXML
381 raptor_www_libxml_free(www);
382 #endif
383 #ifdef RAPTOR_WWW_LIBFETCH
384 raptor_www_libfetch_free(www);
385 #endif
386
387 if(www->uri)
388 raptor_free_uri_v2(www->world, www->uri);
389
390 if(www->final_uri)
391 raptor_free_uri_v2(www->world, www->final_uri);
392
393 RAPTOR_FREE(www, www);
394 }
395
396
397
398 /**
399 * raptor_www_set_error_handler:
400 * @www: WWW object
401 * @error_handler: error handler function
402 * @error_data: error handler data
403 *
404 * Set the error handler routine for the raptor_www class.
405 *
406 * This takes the same arguments as the raptor_parser_set_error() and
407 * raptor_parser_set_warning_handler() methods.
408 **/
409 void
raptor_www_set_error_handler(raptor_www * www,raptor_message_handler error_handler,void * error_data)410 raptor_www_set_error_handler(raptor_www* www,
411 raptor_message_handler error_handler,
412 void *error_data)
413 {
414 www->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data=error_data;
415 www->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler=error_handler;
416 }
417
418
419 /**
420 * raptor_www_set_write_bytes_handler:
421 * @www: WWW object
422 * @handler: bytes handler function
423 * @user_data: bytes handler data
424 *
425 * Set the handler to receive bytes written by the #raptor_www implementation.
426 *
427 **/
428 void
raptor_www_set_write_bytes_handler(raptor_www * www,raptor_www_write_bytes_handler handler,void * user_data)429 raptor_www_set_write_bytes_handler(raptor_www* www,
430 raptor_www_write_bytes_handler handler,
431 void *user_data)
432 {
433 www->write_bytes=handler;
434 www->write_bytes_userdata=user_data;
435 }
436
437
438 /**
439 * raptor_www_set_content_type_handler:
440 * @www: WWW object
441 * @handler: content type handler function
442 * @user_data: content type handler data
443 *
444 * Set the handler to receive the HTTP Content-Type header value.
445 *
446 * This is called if or when the value is discovered during retrieval
447 * by the raptor_www implementation. Not all implementations provide
448 * access to this.
449 **/
450 void
raptor_www_set_content_type_handler(raptor_www * www,raptor_www_content_type_handler handler,void * user_data)451 raptor_www_set_content_type_handler(raptor_www* www,
452 raptor_www_content_type_handler handler,
453 void *user_data)
454 {
455 www->content_type=handler;
456 www->content_type_userdata=user_data;
457 }
458
459
460 /**
461 * raptor_www_set_user_agent:
462 * @www: WWW object
463 * @user_agent: User-Agent string
464 *
465 * Set the user agent value, for HTTP requests typically.
466 **/
467 void
raptor_www_set_user_agent(raptor_www * www,const char * user_agent)468 raptor_www_set_user_agent(raptor_www* www, const char *user_agent)
469 {
470 char *ua_copy=NULL;
471
472 if(!user_agent || !*user_agent) {
473 www->user_agent=NULL;
474 return;
475 }
476
477 ua_copy=(char*)RAPTOR_MALLOC(cstring, strlen(user_agent)+1);
478 if(!ua_copy)
479 return;
480 strcpy(ua_copy, user_agent);
481
482 www->user_agent=ua_copy;
483 }
484
485
486 /**
487 * raptor_www_set_proxy:
488 * @www: WWW object
489 * @proxy: proxy string.
490 *
491 * Set the proxy for the WWW object.
492 *
493 * The @proxy usually a string of the form http://server.domain:port.
494 **/
495 void
raptor_www_set_proxy(raptor_www * www,const char * proxy)496 raptor_www_set_proxy(raptor_www* www, const char *proxy)
497 {
498 char *proxy_copy;
499
500 if(!proxy)
501 return;
502
503 proxy_copy=(char*)RAPTOR_MALLOC(cstring, strlen(proxy)+1);
504 if(!proxy_copy)
505 return;
506 strcpy(proxy_copy, proxy);
507
508 www->proxy=proxy_copy;
509 }
510
511
512 /**
513 * raptor_www_set_http_accept:
514 * @www: #raptor_www class
515 * @value: Accept: header value or NULL to have an empty one.
516 *
517 * Set HTTP Accept header.
518 *
519 **/
520 void
raptor_www_set_http_accept(raptor_www * www,const char * value)521 raptor_www_set_http_accept(raptor_www* www, const char *value)
522 {
523 char *value_copy;
524 size_t len=8; /* strlen("Accept:")+1 */
525
526 if(value)
527 len+=1+strlen(value); /* " "+value */
528
529 value_copy=(char*)RAPTOR_MALLOC(cstring, len);
530 if(!value_copy)
531 return;
532 www->http_accept=value_copy;
533
534 strcpy(value_copy, "Accept:");
535 value_copy+=7;
536 if(value) {
537 *value_copy++=' ';
538 strcpy(value_copy, value);
539 }
540
541 #if RAPTOR_DEBUG > 1
542 RAPTOR_DEBUG2("Using Accept header: '%s'\n", www->http_accept);
543 #endif
544 }
545
546
547 /**
548 * raptor_www_set_connection_timeout:
549 * @www: WWW object
550 * @timeout: Timeout in seconds
551 *
552 * Set WWW connection timeout
553 **/
554 void
raptor_www_set_connection_timeout(raptor_www * www,int timeout)555 raptor_www_set_connection_timeout(raptor_www* www, int timeout)
556 {
557 www->connection_timeout=timeout;
558 }
559
560
561 /**
562 * raptor_www_set_http_cache_control:
563 * @www: WWW object
564 * @cache_control: Cache-Control header value (or NULL to disable)
565 *
566 * Set HTTP Cache-Control:header (default none)
567 *
568 * The @cache_control value can be a string to set it, "" to send
569 * a blank header or NULL to not set the header at all.
570 *
571 * Return value: non-0 on failure
572 **/
573 int
raptor_www_set_http_cache_control(raptor_www * www,const char * cache_control)574 raptor_www_set_http_cache_control(raptor_www* www, const char* cache_control)
575 {
576 char *cache_control_copy;
577 const char* const header="Cache-Control:";
578 const size_t header_len=14; /* strlen("Cache-Control:") */
579 size_t len;
580
581 RAPTOR_ASSERT((strlen(header) != header_len), "Cache-Control header length is wrong");
582
583 if(www->cache_control) {
584 RAPTOR_FREE(cstring, www->cache_control);
585 www->cache_control=NULL;
586 }
587
588 if(!cache_control) {
589 www->cache_control=NULL;
590 return 0;
591 }
592
593 len=header_len + 1 +strlen(cache_control) + 1; /* header+" "+cache_control+"\0" */
594
595 cache_control_copy=(char*)RAPTOR_MALLOC(cstring, len);
596 if(!cache_control_copy)
597 return 1;
598
599 www->cache_control=cache_control_copy;
600
601 strncpy(cache_control_copy, header, header_len);
602 cache_control_copy+= header_len;
603 if(*cache_control) {
604 *cache_control_copy++=' ';
605 strcpy(cache_control_copy, cache_control);
606 }
607
608 #if RAPTOR_DEBUG > 1
609 RAPTOR_DEBUG2("Using Cache-Control header: '%s'\n", www->cache_control);
610 #endif
611
612 return 0;
613 }
614
615
616 /**
617 * raptor_www_set_uri_filter:
618 * @www: WWW object
619 * @filter: URI filter function
620 * @user_data: User data to pass to filter function
621 *
622 * Set URI filter function for WWW retrieval.
623 **/
624 void
raptor_www_set_uri_filter(raptor_www * www,raptor_uri_filter_func filter,void * user_data)625 raptor_www_set_uri_filter(raptor_www* www,
626 raptor_uri_filter_func filter,
627 void *user_data)
628 {
629 www->uri_filter=filter;
630 www->uri_filter_user_data=user_data;
631 }
632
633
634 /**
635 * raptor_www_get_connection:
636 * @www: #raptor_www object
637 *
638 * Get WWW library connection object.
639 *
640 * Return the internal WWW connection handle. For libcurl, this
641 * returns the CURL handle and for libxml the context. Otherwise
642 * it returns NULL.
643 *
644 * Return value: connection pointer
645 **/
646 void*
raptor_www_get_connection(raptor_www * www)647 raptor_www_get_connection(raptor_www* www)
648 {
649 #ifdef RAPTOR_WWW_NONE
650 return NULL;
651 #endif
652
653 #ifdef RAPTOR_WWW_LIBCURL
654 return www->curl_handle;
655 #endif
656
657 #ifdef RAPTOR_WWW_LIBXML
658 return www->ctxt;
659 #endif
660
661 #ifdef RAPTOR_WWW_LIBFETCH
662 return NULL;
663 #endif
664
665 return NULL;
666 }
667
668
669 /**
670 * raptor_www_abort:
671 * @www: WWW object
672 * @reason: abort reason message
673 *
674 * Abort an ongoing raptor WWW operation and pass back a reason.
675 *
676 * This is typically used within one of the raptor WWW handlers
677 * when retrieval need no longer continue due to another
678 * processing issue or error.
679 **/
680 void
raptor_www_abort(raptor_www * www,const char * reason)681 raptor_www_abort(raptor_www* www, const char *reason)
682 {
683 www->failed=1;
684 }
685
686
687 void
raptor_www_error(raptor_www * www,const char * message,...)688 raptor_www_error(raptor_www* www, const char *message, ...)
689 {
690 va_list arguments;
691
692 va_start(arguments, message);
693
694 raptor_log_error_varargs(www->world,
695 RAPTOR_LOG_LEVEL_ERROR,
696 www->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler,
697 www->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data,
698 &www->locator,
699 message, arguments);
700
701 va_end(arguments);
702 }
703
704
705 static int
raptor_www_file_handle_fetch(raptor_www * www,FILE * fh)706 raptor_www_file_handle_fetch(raptor_www* www, FILE* fh)
707 {
708 unsigned char buffer[RAPTOR_WWW_BUFFER_SIZE+1];
709
710 while(!feof(fh)) {
711 int len=fread(buffer, 1, RAPTOR_WWW_BUFFER_SIZE, fh);
712 if(len > 0) {
713 www->total_bytes += len;
714 buffer[len]='\0';
715
716 if(www->write_bytes)
717 www->write_bytes(www, www->write_bytes_userdata, buffer, len, 1);
718 }
719
720 if(feof(fh) || www->failed)
721 break;
722 }
723
724 if(!www->failed)
725 www->status_code=200;
726
727 return www->failed;
728 }
729
730
731 static int
raptor_www_file_fetch(raptor_www * www)732 raptor_www_file_fetch(raptor_www* www)
733 {
734 char *filename;
735 FILE *fh;
736 unsigned char *uri_string=raptor_uri_as_string_v2(www->world, www->uri);
737 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
738 struct stat buf;
739 #endif
740
741 www->status_code=200;
742
743 filename=raptor_uri_uri_string_to_filename(uri_string);
744 if(!filename) {
745 raptor_www_error(www, "Not a file: URI");
746 return 1;
747 }
748
749 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
750 if(!stat(filename, &buf) && S_ISDIR(buf.st_mode)) {
751 raptor_www_error(www, "Cannot read from a directory '%s'", filename);
752 RAPTOR_FREE(cstring, filename);
753 www->status_code=404;
754 return 1;
755 }
756 #endif
757
758 fh=fopen(filename, "rb");
759 if(!fh) {
760 raptor_www_error(www, "file '%s' open failed - %s",
761 filename, strerror(errno));
762 RAPTOR_FREE(cstring, filename);
763 www->status_code=(errno == EACCES) ? 403: 404;
764 www->failed=1;
765
766 return www->failed;
767 }
768
769 raptor_www_file_handle_fetch(www, fh);
770 fclose(fh);
771
772 RAPTOR_FREE(cstring, filename);
773
774 return www->failed;
775 }
776
777
778 /**
779 * raptor_www_fetch:
780 * @www: WWW object
781 * @uri: URI to read from
782 *
783 * Start a WWW content retrieval for the given URI, returning data via the write_bytes handler.
784 *
785 * Return value: non-0 on failure.
786 **/
787 int
raptor_www_fetch(raptor_www * www,raptor_uri * uri)788 raptor_www_fetch(raptor_www *www, raptor_uri *uri)
789 {
790 int status=1;
791
792 www->uri=raptor_new_uri_for_retrieval_v2(www->world, uri);
793
794 www->locator.uri=uri;
795 www->locator.line= -1;
796 www->locator.column= -1;
797
798 if(www->uri_filter)
799 if(www->uri_filter(www->uri_filter_user_data, uri))
800 return status;
801
802 #ifdef RAPTOR_WWW_NONE
803 status=raptor_www_file_fetch(www);
804 #else
805
806 if(raptor_uri_uri_string_is_file_uri(raptor_uri_as_string_v2(www->world, www->uri)))
807 status=raptor_www_file_fetch(www);
808 else {
809 #ifdef RAPTOR_WWW_LIBCURL
810 status=raptor_www_curl_fetch(www);
811 #endif
812
813 #ifdef RAPTOR_WWW_LIBXML
814 status=raptor_www_libxml_fetch(www);
815 #endif
816
817 #ifdef RAPTOR_WWW_LIBFETCH
818 status=raptor_www_libfetch_fetch(www);
819 #endif
820 }
821
822 #endif
823 if(!status && www->status_code && www->status_code != 200){
824 raptor_www_error(www, "Resolving URI failed with HTTP status %d",
825 www->status_code);
826 status=1;
827 }
828
829 www->failed=status;
830
831 return www->failed;
832 }
833
834
835 static void
raptor_www_fetch_to_string_write_bytes(raptor_www * www,void * userdata,const void * ptr,size_t size,size_t nmemb)836 raptor_www_fetch_to_string_write_bytes(raptor_www* www, void *userdata,
837 const void *ptr, size_t size,
838 size_t nmemb)
839 {
840 raptor_stringbuffer* sb=(raptor_stringbuffer*)userdata;
841 int len=size*nmemb;
842
843 raptor_stringbuffer_append_counted_string(sb, (unsigned char*)ptr, len, 1);
844 }
845
846
847 /**
848 * raptor_www_fetch_to_string:
849 * @www: raptor_www object
850 * @uri: raptor_uri to retrieve
851 * @string_p: pointer to location to hold string
852 * @length_p: pointer to location to hold length of string (or NULL)
853 * @malloc_handler: pointer to malloc to use to make string (or NULL)
854 *
855 * Start a WWW content retrieval for the given URI, returning the data in a new string.
856 *
857 * If malloc_handler is null, raptor will allocate it using it's
858 * own memory allocator. *string_p is set to NULL on failure (and
859 * *length_p to 0 if length_p is not NULL).
860 *
861 * Return value: non-0 on failure
862 **/
863 RAPTOR_EXTERN_C
864 int
raptor_www_fetch_to_string(raptor_www * www,raptor_uri * uri,void ** string_p,size_t * length_p,void * (* malloc_handler)(size_t size))865 raptor_www_fetch_to_string(raptor_www *www, raptor_uri *uri,
866 void **string_p, size_t *length_p,
867 void *(*malloc_handler)(size_t size))
868 {
869 raptor_stringbuffer *sb=NULL;
870 void *str=NULL;
871 raptor_www_write_bytes_handler saved_write_bytes;
872 void *saved_write_bytes_userdata;
873
874 sb=raptor_new_stringbuffer();
875 if(!sb)
876 return 1;
877
878 if(length_p)
879 *length_p=0;
880
881 saved_write_bytes=www->write_bytes;
882 saved_write_bytes_userdata=www->write_bytes_userdata;
883 raptor_www_set_write_bytes_handler(www, raptor_www_fetch_to_string_write_bytes, sb);
884
885 if(raptor_www_fetch(www, uri))
886 str=NULL;
887 else {
888 size_t len=raptor_stringbuffer_length(sb);
889 if(len) {
890 str=(void*)malloc_handler(len+1);
891 if(str) {
892 raptor_stringbuffer_copy_to_string(sb, (unsigned char*)str, len+1);
893 *string_p=str;
894 if(length_p)
895 *length_p=len;
896 }
897 }
898 }
899
900 if(sb)
901 raptor_free_stringbuffer(sb);
902
903 raptor_www_set_write_bytes_handler(www, saved_write_bytes, saved_write_bytes_userdata);
904
905 return (str == NULL);
906 }
907
908
909 /**
910 * raptor_www_get_final_uri:
911 * @www: #raptor_www object
912 *
913 * Get the WWW final resolved URI.
914 *
915 * This returns the URI used after any protocol redirection.
916 *
917 * Return value: a new URI or NULL if not known.
918 **/
919 raptor_uri*
raptor_www_get_final_uri(raptor_www * www)920 raptor_www_get_final_uri(raptor_www* www)
921 {
922 return www->final_uri ? raptor_uri_copy_v2(www->world, www->final_uri) : NULL;
923 }
924
925
926 /**
927 * raptor_www_set_final_uri_handler:
928 * @www: WWW object
929 * @handler: content type handler function
930 * @user_data: content type handler data
931 *
932 * Set the handler to receive the HTTP Content-Type header value.
933 *
934 * This is called if or when the value is discovered during retrieval
935 * by the raptor_www implementation. Not all implementations provide
936 * access to this.
937 **/
938 void
raptor_www_set_final_uri_handler(raptor_www * www,raptor_www_final_uri_handler handler,void * user_data)939 raptor_www_set_final_uri_handler(raptor_www* www,
940 raptor_www_final_uri_handler handler,
941 void *user_data)
942 {
943 www->final_uri_handler=handler;
944 www->final_uri_userdata=user_data;
945 }
946