1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * raptor_json.c - Raptor JSON Parser
4  *
5  * RDF/JSON
6  * http://n2.talis.com/wiki/RDF_JSON_Specification
7  *
8  * Copyright (C) 2001-2010, David Beckett http://www.dajobe.org/
9  *
10  * This package is Free Software and part of Redland http://librdf.org/
11  *
12  * It is licensed under the following three licenses as alternatives:
13  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
14  *   2. GNU General Public License (GPL) V2 or any newer version
15  *   3. Apache License, V2.0 or any newer version
16  *
17  * You may not use this file except in compliance with at least one of
18  * the above three licenses.
19  *
20  * See LICENSE.html or LICENSE.txt at the top of this package for the
21  * complete terms and further detail along with the license texts for
22  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
23  *
24  *
25  */
26 
27 
28 #ifdef HAVE_CONFIG_H
29 #include <raptor_config.h>
30 #endif
31 
32 #include <stdio.h>
33 #include <string.h>
34 #include <ctype.h>
35 #include <stdarg.h>
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #ifdef HAVE_STDLIB_H
40 #include <stdlib.h>
41 #endif
42 
43 #include <yajl/yajl_parse.h>
44 
45 /* Raptor includes */
46 #include "raptor2.h"
47 #include "raptor_internal.h"
48 
49 
50 typedef enum {
51   RAPTOR_JSON_STATE_ROOT,
52   RAPTOR_JSON_STATE_MAP_ROOT,
53   RAPTOR_JSON_STATE_TRIPLES_KEY,
54   RAPTOR_JSON_STATE_TRIPLES_ARRAY,
55   RAPTOR_JSON_STATE_TRIPLES_TRIPLE,
56   RAPTOR_JSON_STATE_TRIPLES_TERM,
57   RAPTOR_JSON_STATE_RESOURCES_SUBJECT_KEY,
58   RAPTOR_JSON_STATE_RESOURCES_PREDICATE,
59   RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY,
60   RAPTOR_JSON_STATE_RESOURCES_OBJECT
61 } raptor_json_parse_state;
62 
63 typedef enum {
64   RAPTOR_JSON_TERM_UNKNOWN,
65   RAPTOR_JSON_TERM_SUBJECT,
66   RAPTOR_JSON_TERM_PREDICATE,
67   RAPTOR_JSON_TERM_OBJECT
68 } raptor_json_term;
69 
70 typedef enum {
71   RAPTOR_JSON_ATTRIB_UNKNOWN,
72   RAPTOR_JSON_ATTRIB_VALUE,
73   RAPTOR_JSON_ATTRIB_LANG,
74   RAPTOR_JSON_ATTRIB_TYPE,
75   RAPTOR_JSON_ATTRIB_DATATYPE
76 } raptor_json_term_attrib;
77 
78 
79 /* When YAJL V1 support is dropped, this can be removed */
80 #ifdef HAVE_YAJL2
81 #define RAPTOR_YAJL_LEN_TYPE size_t
82 #else
83 #define RAPTOR_YAJL_LEN_TYPE unsigned int
84 #endif
85 
86 /*
87  * JSON parser object
88  */
89 struct raptor_json_parser_context_s {
90 #ifdef HAVE_YAJL2
91 #else
92   yajl_parser_config config;
93 #endif
94   yajl_handle handle;
95 
96   /* Parser state */
97   raptor_json_parse_state state;
98   raptor_json_term        term;
99   raptor_json_term_attrib attrib;
100 
101   /* Temporary storage, while creating terms */
102   raptor_term_type term_type;
103   unsigned char*   term_value;
104   unsigned char*   term_datatype;
105   unsigned char*   term_lang;
106 
107   /* Temporary storage, while creating statements */
108   raptor_statement statement;
109 };
110 
111 typedef struct raptor_json_parser_context_s raptor_json_parser_context;
112 
113 
114 static void
raptor_json_reset_term(raptor_json_parser_context * context)115 raptor_json_reset_term(raptor_json_parser_context *context)
116 {
117   if(context->term_value)
118     RAPTOR_FREE(char*, context->term_value);
119   if(context->term_lang)
120     RAPTOR_FREE(char*, context->term_lang);
121   if(context->term_datatype)
122     RAPTOR_FREE(char*, context->term_datatype);
123 
124   context->term_value = NULL;
125   context->term_lang = NULL;
126   context->term_datatype = NULL;
127   context->term_type = RAPTOR_TERM_TYPE_UNKNOWN;
128   context->attrib = RAPTOR_JSON_ATTRIB_UNKNOWN;
129 }
130 
131 static unsigned char*
raptor_json_cstring_from_counted_string(raptor_parser * rdf_parser,const unsigned char * str,RAPTOR_YAJL_LEN_TYPE len)132 raptor_json_cstring_from_counted_string(raptor_parser *rdf_parser, const unsigned char* str, RAPTOR_YAJL_LEN_TYPE len)
133 {
134   unsigned char *cstr = RAPTOR_MALLOC(unsigned char*, len + 1);
135   if(!cstr) {
136     raptor_parser_fatal_error(rdf_parser, "Out of memory");
137     return NULL;
138   }
139 
140   memcpy(cstr, str, len);
141   cstr[len] = '\0';
142 
143   return cstr;
144 }
145 
146 static raptor_term*
raptor_json_new_term_from_counted_string(raptor_parser * rdf_parser,const unsigned char * str,size_t len)147 raptor_json_new_term_from_counted_string(raptor_parser *rdf_parser, const unsigned char* str, size_t len)
148 {
149   raptor_term *term = NULL;
150 
151   if(len > 2 && str[0] == '_' && str[1] == ':') {
152     const unsigned char *node_id = &str[2];
153     term = raptor_new_term_from_counted_blank(rdf_parser->world, node_id, len - 2);
154 
155   } else {
156     raptor_uri *uri = raptor_new_uri_from_counted_string(rdf_parser->world, str, len);
157     if(!uri) {
158       unsigned char* cstr = raptor_json_cstring_from_counted_string(rdf_parser, str, RAPTOR_BAD_CAST(int, len));
159       raptor_parser_error(rdf_parser, "Could not create uri from '%s'", cstr);
160       RAPTOR_FREE(char*, cstr);
161       return NULL;
162     }
163 
164     term = raptor_new_term_from_uri(rdf_parser->world, uri);
165     raptor_free_uri(uri);
166   }
167 
168   return term;
169 }
170 
171 
172 static raptor_term*
raptor_json_generate_term(raptor_parser * rdf_parser)173 raptor_json_generate_term(raptor_parser *rdf_parser)
174 {
175   raptor_json_parser_context *context = (raptor_json_parser_context*)rdf_parser->context;
176   raptor_term *term = NULL;
177 
178   if(!context->term_value) {
179     raptor_parser_error(rdf_parser, "No value for term defined");
180     return NULL;
181   }
182 
183   switch(context->term_type) {
184     case RAPTOR_TERM_TYPE_URI: {
185       raptor_uri *uri = raptor_new_uri(rdf_parser->world, context->term_value);
186       if(!uri) {
187         raptor_parser_error(rdf_parser, "Could not create uri from '%s'", context->term_value);
188         return NULL;
189       }
190       term = raptor_new_term_from_uri(rdf_parser->world, uri);
191       raptor_free_uri(uri);
192       break;
193     }
194     case RAPTOR_TERM_TYPE_LITERAL: {
195       raptor_uri *datatype_uri = NULL;
196       if(context->term_datatype) {
197         datatype_uri = raptor_new_uri(rdf_parser->world, context->term_datatype);
198       }
199       term = raptor_new_term_from_literal(rdf_parser->world, context->term_value, datatype_uri, context->term_lang);
200       if(datatype_uri)
201         raptor_free_uri(datatype_uri);
202       break;
203     }
204     case RAPTOR_TERM_TYPE_BLANK: {
205       unsigned char *node_id = context->term_value;
206       if(strlen((const char*)node_id) > 2 && node_id[0] == '_' && node_id[1] == ':') {
207           node_id = &node_id[2];
208       }
209       term = raptor_new_term_from_blank(rdf_parser->world, node_id);
210       break;
211     }
212     case RAPTOR_TERM_TYPE_UNKNOWN:
213       raptor_parser_error(rdf_parser, "No type for term defined");
214       break;
215   }
216 
217   return term;
218 }
219 
220 
raptor_json_yajl_null(void * ctx)221 static int raptor_json_yajl_null(void * ctx)
222 {
223   raptor_parser* rdf_parser = (raptor_parser*)ctx;
224   raptor_parser_error(rdf_parser, "Nulls are not valid in RDF/JSON");
225   return 0;
226 }
227 
raptor_json_yajl_boolean(void * ctx,int b)228 static int raptor_json_yajl_boolean(void * ctx, int b)
229 {
230   raptor_parser* rdf_parser = (raptor_parser*)ctx;
231   raptor_parser_error(rdf_parser, "Booleans are not valid in RDF/JSON");
232   return 0;
233 }
234 
235 #ifdef HAVE_YAJL2
236 #define YAJL_INTEGER_CALLBACK_ARG_TYPE long long
237 #else
238 #define YAJL_INTEGER_CALLBACK_ARG_TYPE long
239 #endif
raptor_json_yajl_integer(void * ctx,YAJL_INTEGER_CALLBACK_ARG_TYPE integerVal)240 static int raptor_json_yajl_integer(void * ctx,
241                                     YAJL_INTEGER_CALLBACK_ARG_TYPE integerVal)
242 {
243   raptor_parser* rdf_parser = (raptor_parser*)ctx;
244   raptor_parser_error(rdf_parser, "Integers are not valid in RDF/JSON");
245   return 0;
246 }
247 
raptor_json_yajl_double(void * ctx,double d)248 static int raptor_json_yajl_double(void * ctx, double d)
249 {
250   raptor_parser* rdf_parser = (raptor_parser*)ctx;
251   raptor_parser_error(rdf_parser, "Floats are not valid in RDF/JSON");
252   return 0;
253 }
254 
raptor_json_yajl_string(void * ctx,const unsigned char * str,RAPTOR_YAJL_LEN_TYPE len)255 static int raptor_json_yajl_string(void * ctx, const unsigned char * str,
256                                    RAPTOR_YAJL_LEN_TYPE len)
257 {
258   raptor_parser* rdf_parser = (raptor_parser*)ctx;
259   raptor_json_parser_context *context;
260   context = (raptor_json_parser_context*)rdf_parser->context;
261 
262   if(context->state == RAPTOR_JSON_STATE_TRIPLES_TERM ||
263       context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT) {
264     switch(context->attrib) {
265       case RAPTOR_JSON_ATTRIB_VALUE:
266         context->term_value = raptor_json_cstring_from_counted_string(rdf_parser, str, len);
267       break;
268       case RAPTOR_JSON_ATTRIB_LANG:
269         context->term_lang = raptor_json_cstring_from_counted_string(rdf_parser, str, len);
270       break;
271       case RAPTOR_JSON_ATTRIB_TYPE:
272         if(!strncmp((const char*)str, "uri", len)) {
273           context->term_type = RAPTOR_TERM_TYPE_URI;
274         } else if(!strncmp((const char*)str, "literal", len)) {
275           context->term_type = RAPTOR_TERM_TYPE_LITERAL;
276         } else if(!strncmp((const char*)str, "bnode", len)) {
277           context->term_type = RAPTOR_TERM_TYPE_BLANK;
278         } else {
279           unsigned char * cstr = raptor_json_cstring_from_counted_string(rdf_parser, str, len);
280           context->term_type = RAPTOR_TERM_TYPE_UNKNOWN;
281           raptor_parser_error(rdf_parser, "Unknown term type: %s", cstr);
282           RAPTOR_FREE(char*, cstr);
283         }
284       break;
285       case RAPTOR_JSON_ATTRIB_DATATYPE:
286         context->term_datatype = raptor_json_cstring_from_counted_string(rdf_parser, str, len);
287       break;
288       case RAPTOR_JSON_ATTRIB_UNKNOWN:
289       default:
290         raptor_parser_error(rdf_parser, "Unsupported term attribute in raptor_json_string");
291       break;
292     }
293   } else {
294     raptor_parser_error(rdf_parser, "Unexpected JSON string");
295     return 0;
296   }
297   return 1;
298 }
299 
raptor_json_yajl_map_key(void * ctx,const unsigned char * str,RAPTOR_YAJL_LEN_TYPE len)300 static int raptor_json_yajl_map_key(void * ctx, const unsigned char * str,
301                                     RAPTOR_YAJL_LEN_TYPE len)
302 {
303   raptor_parser* rdf_parser = (raptor_parser*)ctx;
304   raptor_json_parser_context *context;
305   context = (raptor_json_parser_context*)rdf_parser->context;
306 
307   if(context->state == RAPTOR_JSON_STATE_MAP_ROOT) {
308     if(!strncmp((const char*)str, "triples", len)) {
309       context->state = RAPTOR_JSON_STATE_TRIPLES_KEY;
310       return 1;
311     } else {
312       if(context->statement.subject)
313         raptor_free_term(context->statement.subject);
314       context->statement.subject = raptor_json_new_term_from_counted_string(rdf_parser, str, len);
315       if(!context->statement.subject)
316         return 0;
317       context->state = RAPTOR_JSON_STATE_RESOURCES_SUBJECT_KEY;
318       return 1;
319     }
320   } else if(context->state == RAPTOR_JSON_STATE_RESOURCES_PREDICATE) {
321     if(context->statement.predicate)
322       raptor_free_term(context->statement.predicate);
323     context->statement.predicate = raptor_json_new_term_from_counted_string(rdf_parser, str, len);
324     if(!context->statement.predicate)
325       return 0;
326     return 1;
327   } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TRIPLE) {
328     if(!strncmp((const char*)str, "subject", len)) {
329       context->term = RAPTOR_JSON_TERM_SUBJECT;
330       return 1;
331     } else if(!strncmp((const char*)str, "predicate", len)) {
332       context->term = RAPTOR_JSON_TERM_PREDICATE;
333       return 1;
334     } else if(!strncmp((const char*)str, "object", len)) {
335       context->term = RAPTOR_JSON_TERM_OBJECT;
336       return 1;
337     } else {
338       raptor_parser_error(rdf_parser, "Unexpected JSON key name in triple definition");
339       return 0;
340     }
341   } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TERM ||
342              context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT) {
343     if(!strncmp((const char*)str, "value", len)) {
344       context->attrib = RAPTOR_JSON_ATTRIB_VALUE;
345       return 1;
346     } else if(!strncmp((const char*)str, "type", len)) {
347       context->attrib = RAPTOR_JSON_ATTRIB_TYPE;
348       return 1;
349     } else if(!strncmp((const char*)str, "datatype", len)) {
350       context->attrib = RAPTOR_JSON_ATTRIB_DATATYPE;
351       return 1;
352     } else if(!strncmp((const char*)str, "lang", len)) {
353       context->attrib = RAPTOR_JSON_ATTRIB_LANG;
354       return 1;
355     } else {
356       context->attrib = RAPTOR_JSON_ATTRIB_UNKNOWN;
357       raptor_parser_error(rdf_parser, "Unexpected key name in triple definition");
358       return 0;
359     }
360   } else {
361     raptor_parser_error(rdf_parser, "Unexpected JSON map key");
362     return 0;
363   }
364 }
365 
raptor_json_yajl_start_map(void * ctx)366 static int raptor_json_yajl_start_map(void * ctx)
367 {
368   raptor_parser* rdf_parser = (raptor_parser*)ctx;
369   raptor_json_parser_context *context;
370   context = (raptor_json_parser_context*)rdf_parser->context;
371 
372   if(context->state == RAPTOR_JSON_STATE_ROOT) {
373     context->state = RAPTOR_JSON_STATE_MAP_ROOT;
374     return 1;
375   } else if(context->state == RAPTOR_JSON_STATE_RESOURCES_SUBJECT_KEY) {
376     context->state = RAPTOR_JSON_STATE_RESOURCES_PREDICATE;
377     return 1;
378   } else if(context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY) {
379     context->state = RAPTOR_JSON_STATE_RESOURCES_OBJECT;
380     return 1;
381   } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_ARRAY) {
382     raptor_statement_clear(&context->statement);
383     context->term = RAPTOR_JSON_TERM_UNKNOWN;
384     context->state = RAPTOR_JSON_STATE_TRIPLES_TRIPLE;
385     return 1;
386   } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TRIPLE) {
387     context->state = RAPTOR_JSON_STATE_TRIPLES_TERM;
388     raptor_json_reset_term(context);
389     return 1;
390   } else {
391     raptor_parser_error(rdf_parser, "Unexpected start of JSON map");
392     return 0;
393   }
394 }
395 
396 
raptor_json_yajl_end_map(void * ctx)397 static int raptor_json_yajl_end_map(void * ctx)
398 {
399   raptor_parser* rdf_parser = (raptor_parser*)ctx;
400   raptor_json_parser_context *context;
401   context = (raptor_json_parser_context*)rdf_parser->context;
402 
403   if(context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT) {
404     context->statement.object = raptor_json_generate_term(rdf_parser);
405     if(!context->statement.object)
406       return 0;
407 
408     /* Generate the statement */
409     (*rdf_parser->statement_handler)(rdf_parser->user_data, &context->statement);
410 
411     raptor_free_term(context->statement.object);
412     context->statement.object = NULL;
413     raptor_json_reset_term(context);
414 
415     context->state = RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY;
416     return 1;
417   } else if(context->state == RAPTOR_JSON_STATE_RESOURCES_PREDICATE) {
418     context->state = RAPTOR_JSON_STATE_MAP_ROOT;
419     return 1;
420   } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TERM) {
421     raptor_term *term = raptor_json_generate_term(rdf_parser);
422     if(!term)
423       return 0;
424 
425     /* Store the term in the statement */
426     switch(context->term) {
427       case RAPTOR_JSON_TERM_SUBJECT:
428         if(context->statement.subject)
429           raptor_free_term(context->statement.subject);
430         context->statement.subject = term;
431       break;
432       case RAPTOR_JSON_TERM_PREDICATE:
433         if(context->statement.predicate)
434           raptor_free_term(context->statement.predicate);
435         context->statement.predicate = term;
436       break;
437       case RAPTOR_JSON_TERM_OBJECT:
438         if(context->statement.object)
439           raptor_free_term(context->statement.object);
440         context->statement.object = term;
441       break;
442       case RAPTOR_JSON_TERM_UNKNOWN:
443       default:
444         raptor_parser_error(rdf_parser, "Unknown term in raptor_json_end_map");
445       break;
446     }
447 
448     context->state = RAPTOR_JSON_STATE_TRIPLES_TRIPLE;
449     raptor_json_reset_term(context);
450     return 1;
451   } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TRIPLE) {
452     if(!context->statement.subject) {
453       raptor_parser_error(rdf_parser, "Triple is missing a subject term");
454       return 0;
455     } else if(!context->statement.predicate) {
456       raptor_parser_error(rdf_parser, "Triple is missing a predicate term");
457       return 0;
458     } else if(!context->statement.object) {
459       raptor_parser_error(rdf_parser, "Triple is missing a object term");
460       return 0;
461     } else {
462       /* Generate the statement */
463       (*rdf_parser->statement_handler)(rdf_parser->user_data, &context->statement);
464     }
465     raptor_statement_clear(&context->statement);
466     context->state = RAPTOR_JSON_STATE_TRIPLES_ARRAY;
467     return 1;
468   } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_KEY) {
469     context->state = RAPTOR_JSON_STATE_MAP_ROOT;
470     return 1;
471   } else if(context->state == RAPTOR_JSON_STATE_MAP_ROOT) {
472     context->state = RAPTOR_JSON_STATE_ROOT;
473     return 1;
474   } else {
475     raptor_parser_error(rdf_parser, "Unexpected end of JSON map");
476     return 0;
477   }
478 }
479 
raptor_json_yajl_start_array(void * ctx)480 static int raptor_json_yajl_start_array(void * ctx)
481 {
482   raptor_parser* rdf_parser = (raptor_parser*)ctx;
483   raptor_json_parser_context *context;
484   context = (raptor_json_parser_context*)rdf_parser->context;
485 
486   if(context->state == RAPTOR_JSON_STATE_RESOURCES_PREDICATE) {
487     context->state = RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY;
488     return 1;
489   } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_KEY) {
490     context->state = RAPTOR_JSON_STATE_TRIPLES_ARRAY;
491     return 1;
492   } else {
493     raptor_parser_error(rdf_parser, "Unexpected start of array");
494     return 0;
495   }
496 }
497 
raptor_json_yajl_end_array(void * ctx)498 static int raptor_json_yajl_end_array(void * ctx)
499 {
500   raptor_parser* rdf_parser = (raptor_parser*)ctx;
501   raptor_json_parser_context *context;
502   context = (raptor_json_parser_context*)rdf_parser->context;
503 
504   if(context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY) {
505     context->state = RAPTOR_JSON_STATE_RESOURCES_PREDICATE;
506     return 1;
507   } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_ARRAY) {
508     context->state = RAPTOR_JSON_STATE_MAP_ROOT;
509     return 1;
510   } else {
511     raptor_parser_error(rdf_parser, "Unexpected end of array");
512     return 0;
513   }
514 }
515 
516 
517 static void*
raptor_json_yajl_malloc(void * ctx,RAPTOR_YAJL_LEN_TYPE sz)518 raptor_json_yajl_malloc(void *ctx, RAPTOR_YAJL_LEN_TYPE sz)
519 {
520   return RAPTOR_MALLOC(void*, sz);
521 }
522 
523 static void*
raptor_json_yajl_realloc(void * ctx,void * ptr,RAPTOR_YAJL_LEN_TYPE sz)524 raptor_json_yajl_realloc(void *ctx, void * ptr, RAPTOR_YAJL_LEN_TYPE sz)
525 {
526   return RAPTOR_REALLOC(void*, ptr, sz);
527 }
528 
529 static void
raptor_json_yajl_free(void * ctx,void * ptr)530 raptor_json_yajl_free(void *ctx, void * ptr)
531 {
532   RAPTOR_FREE(char*, ptr);
533 }
534 
535 static yajl_alloc_funcs raptor_json_yajl_alloc_funcs = {
536   raptor_json_yajl_malloc,
537   raptor_json_yajl_realloc,
538   raptor_json_yajl_free,
539   NULL
540 };
541 
542 static yajl_callbacks raptor_json_yajl_callbacks = {
543   raptor_json_yajl_null,
544   raptor_json_yajl_boolean,
545   raptor_json_yajl_integer,
546   raptor_json_yajl_double,
547   NULL,
548   raptor_json_yajl_string,
549   raptor_json_yajl_start_map,
550   raptor_json_yajl_map_key,
551   raptor_json_yajl_end_map,
552   raptor_json_yajl_start_array,
553   raptor_json_yajl_end_array
554 };
555 
556 
557 
558 /**
559  * raptor_json_parse_init:
560  *
561  * Initialise the Raptor JSON parser.
562  *
563  * Return value: non 0 on failure
564  **/
565 
566 static int
raptor_json_parse_init(raptor_parser * rdf_parser,const char * name)567 raptor_json_parse_init(raptor_parser* rdf_parser, const char *name)
568 {
569   raptor_json_parser_context *context;
570   context = (raptor_json_parser_context*)rdf_parser->context;
571 
572   /* Initialse the static statement */
573   raptor_statement_init(&context->statement, rdf_parser->world);
574 
575   /* Configure the parser */
576 #ifdef HAVE_YAJL2
577 #else
578   context->config.allowComments = 1;
579   context->config.checkUTF8 = 0;
580 #endif
581 
582   return 0;
583 }
584 
585 
586 /*
587  * raptor_json_parse_terminate - Free the Raptor JSON parser
588  * @rdf_parser: parser object
589  *
590  **/
591 static void
raptor_json_parse_terminate(raptor_parser * rdf_parser)592 raptor_json_parse_terminate(raptor_parser* rdf_parser)
593 {
594   raptor_json_parser_context *context;
595   context = (raptor_json_parser_context*)rdf_parser->context;
596 
597   if(context->handle)
598     yajl_free(context->handle);
599 
600   raptor_json_reset_term(context);
601   raptor_statement_clear(&context->statement);
602 }
603 
604 
605 
606 static int
raptor_json_parse_chunk(raptor_parser * rdf_parser,const unsigned char * s,size_t len,int is_end)607 raptor_json_parse_chunk(raptor_parser* rdf_parser,
608                         const unsigned char *s, size_t len,
609                         int is_end)
610 {
611   raptor_json_parser_context *context = (raptor_json_parser_context*)rdf_parser->context;
612   yajl_status status;
613   int result = 0;
614 
615   if(len) {
616     /* Parse the chunk passed to us */
617     status = yajl_parse(context->handle, s, RAPTOR_BAD_CAST(int, len));
618 
619     if(status != yajl_status_ok
620 #ifdef HAVE_YAJL2
621 #else
622        && status != yajl_status_insufficient_data
623 #endif
624     )
625     {
626       unsigned char * str = yajl_get_error(context->handle, 1, s, RAPTOR_BAD_CAST(int, len));
627       raptor_parser_error(rdf_parser, "YAJL error: %s", (const char *) str);
628       result = 1;
629       yajl_free_error(context->handle, str);
630     }
631   }
632 
633   if(is_end) {
634     /* parse any remaining buffered data */
635 #ifdef HAVE_YAJL2
636 #else
637 #define yajl_complete_parse(h) yajl_parse_complete(h)
638 #endif
639     status = yajl_complete_parse(context->handle);
640 
641     if(status != yajl_status_ok)
642     {
643       unsigned char * str = yajl_get_error(context->handle, 0, NULL, 0);
644       raptor_parser_error(rdf_parser, "YAJL error: %s", (const char *) str);
645       result = 1;
646       yajl_free_error(context->handle, str);
647     }
648 
649     raptor_json_reset_term(context);
650     raptor_statement_clear(&context->statement);
651   }
652 
653   return result;
654 }
655 
656 
657 static int
raptor_json_parse_start(raptor_parser * rdf_parser)658 raptor_json_parse_start(raptor_parser* rdf_parser)
659 {
660   raptor_json_parser_context *context = (raptor_json_parser_context*)rdf_parser->context;
661 
662   /* Destroy the old parser */
663   if(context->handle)
664     yajl_free(context->handle);
665 
666   /* Initialise a new parser */
667   context->handle = yajl_alloc(
668     &raptor_json_yajl_callbacks,
669 #ifdef HAVE_YAJL2
670 #else
671     &context->config,
672 #endif
673     &raptor_json_yajl_alloc_funcs,
674     (void *)rdf_parser
675   );
676 
677   if(!context->handle) {
678     raptor_parser_fatal_error(rdf_parser, "Failed to initialise YAJL parser");
679     return 1;
680   }
681 
682   /* Initialise the parse state */
683 #ifdef HAVE_YAJL2
684   yajl_config(context->handle, yajl_allow_comments, 1);
685   yajl_config(context->handle, yajl_dont_validate_strings, 1);
686 #else
687 #endif
688 
689   context->state = RAPTOR_JSON_STATE_ROOT;
690   raptor_json_reset_term(context);
691   raptor_statement_clear(&context->statement);
692 
693   return 0;
694 }
695 
696 
697 static int
raptor_json_parse_recognise_syntax(raptor_parser_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)698 raptor_json_parse_recognise_syntax(raptor_parser_factory* factory,
699                                        const unsigned char *buffer, size_t len,
700                                        const unsigned char *identifier,
701                                        const unsigned char *suffix,
702                                        const char *mime_type)
703 {
704   unsigned int pos = 0;
705   int score = 0;
706 
707   if(suffix) {
708     if(!strcmp((const char*)suffix, "json"))
709       score = 8;
710     if(!strcmp((const char*)suffix, "js"))
711       score = 3;
712   } else if(identifier) {
713     if(strstr((const char*)identifier, "json"))
714       score = 4;
715   }
716 
717   if(mime_type && strstr((const char*)mime_type, "json"))
718       score += 6;
719 
720   /* Is the first non-whitespace character a curly brace? */
721   while(pos<len) {
722     if(isspace(buffer[pos])) {
723       pos++;
724     } else {
725       if(buffer[pos] == '{')
726         score += 2;
727       break;
728     }
729   }
730 
731   return score;
732 }
733 
734 
735 static const char* const json_names[2] = { "json", NULL };
736 
737 static const char* const json_uri_strings[2] = {
738   "http://docs.api.talis.com/platform-api/output-types/rdf-json",
739   NULL
740 };
741 
742 #define JSON_TYPES_COUNT 2
743 static const raptor_type_q json_types[JSON_TYPES_COUNT + 1] = {
744   { "application/json", 16, 1},
745   { "text/json", 9, 1},
746   { NULL, 0, 0}
747 };
748 
749 static int
raptor_json_parser_register_factory(raptor_parser_factory * factory)750 raptor_json_parser_register_factory(raptor_parser_factory *factory)
751 {
752   int rc = 0;
753 
754   factory->desc.names = json_names;
755 
756   factory->desc.mime_types = json_types;
757 
758   factory->desc.label = "RDF/JSON (either Triples or Resource-Centric)";
759   factory->desc.uri_strings = json_uri_strings;
760 
761   factory->desc.flags = 0;
762 
763   factory->context_length     = sizeof(raptor_json_parser_context);
764 
765   factory->init      = raptor_json_parse_init;
766   factory->terminate = raptor_json_parse_terminate;
767   factory->start     = raptor_json_parse_start;
768   factory->chunk     = raptor_json_parse_chunk;
769   factory->recognise_syntax = raptor_json_parse_recognise_syntax;
770 
771   return rc;
772 }
773 
774 
775 int
raptor_init_parser_json(raptor_world * world)776 raptor_init_parser_json(raptor_world* world)
777 {
778   return !raptor_world_register_parser_factory(world,
779                                                &raptor_json_parser_register_factory);
780 }
781