1 /*
2  * Copyright 2006-2011 The FLWOR Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "stdafx.h"
17 
18 #include <sstream>
19 #include <string>
20 
21 #include <zorba/diagnostic_list.h>
22 
23 #include "runtime/json/json.h"
24 #include "store/api/item_factory.h"
25 #include "system/globalenv.h"
26 #include "types/root_typemanager.h"
27 #include "util/ascii_util.h"
28 #include "util/cxx_util.h"
29 #include "util/indent.h"
30 #include "util/json_parser.h"
31 #include "util/json_util.h"
32 #include "util/mem_streambuf.h"
33 #include "util/omanip.h"
34 #include "util/oseparator.h"
35 #include "util/stl_util.h"
36 
37 #include "snelson.h"
38 
39 #define SNELSON_NS "http://john.snelson.org.uk/parsing-json-into-xquery"
40 
41 using namespace std;
42 
43 namespace zorba {
44 
45 ///////////////////////////////////////////////////////////////////////////////
46 
add_type_attribute(store::Item * parent,char const * value)47 static void add_type_attribute( store::Item *parent, char const *value ) {
48   store::Item_t junk_item, att_name, type_name, value_item;
49   GENV_ITEMFACTORY->createQName( att_name, "", "", "type" );
50   type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
51   zstring value_string( value );
52   GENV_ITEMFACTORY->createString( value_item, value_string );
53   GENV_ITEMFACTORY->createAttributeNode(
54     junk_item, parent, att_name, type_name, value_item
55   );
56 }
57 
58 #define ADD_TYPE_ATTRIBUTE(T)             \
59   do {                                    \
60     if ( needs_type_attribute ) {         \
61       add_type_attribute( cur_item, T );  \
62       needs_type_attribute = false;       \
63     }                                     \
64   } while (0)
65 
add_item_element(item_stack_type & item_stack,state_stack_type & state_stack,store::Item_t & cur_item,char const * type)66 static void add_item_element( item_stack_type &item_stack,
67                               state_stack_type &state_stack,
68                               store::Item_t &cur_item,
69                               char const *type ) {
70   store::Item_t element_name, type_name;
71   zstring base_uri;
72   store::NsBindings ns_bindings;
73   GENV_ITEMFACTORY->createQName( element_name, SNELSON_NS, "", "item" );
74   type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
75   GENV_ITEMFACTORY->createElementNode(
76     cur_item, item_stack.top(),
77     element_name, type_name, false, false, ns_bindings, base_uri
78   );
79   add_type_attribute( cur_item.getp(), type );
80   PUSH_ITEM( cur_item );
81 }
82 
83 #define ADD_ITEM_ELEMENT(T)                                 \
84   if ( !IN_STATE( in_array ) ) ; else                       \
85   add_item_element( item_stack, state_stack, cur_item, T )
86 
87 #define POP_ITEM_ELEMENT()  \
88   if ( !IN_STATE( in_array ) ) ; else POP_ITEM()
89 
90 ///////////////////////////////////////////////////////////////////////////////
91 
92 namespace snelson {
93 
parse(json::parser & p,store::Item_t * result)94 void parse( json::parser &p, store::Item_t *result ) {
95   ZORBA_ASSERT( result );
96 
97   store::Item_t cur_item, junk_item, value_item;
98   store::Item_t att_name, element_name, type_name;
99 
100   zstring base_uri;
101   bool got_something = false;
102   item_stack_type item_stack;
103   bool needs_type_attribute = false;
104   bool next_string_is_key = false;
105   store::NsBindings ns_bindings;
106   state_stack_type state_stack;
107   zstring value;
108 
109   json::token token;
110   while ( p.next( &token ) ) {
111     got_something = true;
112 
113     if ( !*result ) {
114       GENV_ITEMFACTORY->createQName( element_name, SNELSON_NS, "", "json" );
115       type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
116       GENV_ITEMFACTORY->createElementNode(
117         cur_item, nullptr,
118         element_name, type_name, false, false, ns_bindings, base_uri
119       );
120       *result = cur_item;
121       needs_type_attribute = true;
122       PUSH_ITEM( cur_item );
123     }
124 
125     switch ( token.get_type() ) {
126 
127       case '[':
128         if ( IN_STATE( in_object ) )
129           PUSH_ITEM( cur_item );
130         ADD_TYPE_ATTRIBUTE( "array" );
131         ADD_ITEM_ELEMENT( "array" );
132         PUSH_STATE( in_array );
133         break;
134 
135       case '{':
136         if ( IN_STATE( in_object ) )
137           PUSH_ITEM( cur_item );
138         ADD_TYPE_ATTRIBUTE( "object" );
139         ADD_ITEM_ELEMENT( "object" );
140         PUSH_STATE( in_object );
141         next_string_is_key = true;
142         break;
143 
144       case ']':
145       case '}':
146         POP_STATE();
147         POP_ITEM_ELEMENT();
148         if ( IN_STATE( in_object ) )
149           POP_ITEM();
150         break;
151 
152       case ',':
153         next_string_is_key = IN_STATE( in_object );
154         break;
155 
156       case json::token::number:
157         ADD_TYPE_ATTRIBUTE( "number" );
158         ADD_ITEM_ELEMENT( "number" );
159         value = token.get_value();
160         GENV_ITEMFACTORY->createTextNode( junk_item, cur_item, value );
161         POP_ITEM_ELEMENT();
162         break;
163 
164       case json::token::string:
165         ADD_TYPE_ATTRIBUTE( "string" );
166         value = token.get_value();
167         if ( next_string_is_key ) {
168           // <pair name="..." ...>
169           GENV_ITEMFACTORY->createQName( element_name, SNELSON_NS, "", "pair" );
170           type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
171           GENV_ITEMFACTORY->createElementNode(
172             cur_item, item_stack.top(),
173             element_name, type_name, false, false, ns_bindings, base_uri
174           );
175 
176           GENV_ITEMFACTORY->createQName( att_name, "", "", "name" );
177           type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
178           GENV_ITEMFACTORY->createString( value_item, value );
179           GENV_ITEMFACTORY->createAttributeNode(
180             junk_item, cur_item, att_name, type_name, value_item
181           );
182 
183           needs_type_attribute = true;
184           next_string_is_key = false;
185         } else {
186           ADD_ITEM_ELEMENT( "string" );
187           GENV_ITEMFACTORY->createTextNode( junk_item, cur_item, value );
188           POP_ITEM_ELEMENT();
189         }
190         break;
191 
192       case 'F':
193       case 'T':
194         ADD_TYPE_ATTRIBUTE( "boolean" );
195         ADD_ITEM_ELEMENT( "boolean" );
196         value = token.get_type() == 'F' ? "false" : "true";
197         GENV_ITEMFACTORY->createTextNode( junk_item, cur_item, value );
198         POP_ITEM_ELEMENT();
199         break;
200 
201       case json::token::json_null:
202         ADD_TYPE_ATTRIBUTE( "null" );
203         ADD_ITEM_ELEMENT( "null" );
204         POP_ITEM_ELEMENT();
205         break;
206 
207       case ':':
208       case json::token::none:
209         break;
210 
211       default:
212         assert( false );
213     } // switch
214   } // while
215   if ( !got_something )
216     throw XQUERY_EXCEPTION( zerr::ZJPE0009_ILLEGAL_EMPTY_STRING );
217 }
218 
219 } // namespace snelson
220 
221 ///////////////////////////////////////////////////////////////////////////////
222 
assert_json_type(json::type t,zstring const & s)223 static void assert_json_type( json::type t, zstring const &s ) {
224   // Doing it this way uses the string data in-place with no copy.
225   mem_streambuf::char_type *const p =
226     const_cast<mem_streambuf::char_type*>( s.data() );
227   mem_streambuf buf( p, s.size() );
228   istringstream iss;
229   iss.ios::rdbuf( &buf );
230 
231   json::lexer lex( iss );
232   json::token token;
233   try {
234     if ( lex.next( &token ) && json::map_type( token.get_type() ) == t )
235       return;
236   }
237   catch ( json::exception const& ) {
238     // do nothing
239   }
240   throw XQUERY_EXCEPTION(
241     zerr::ZJSE0008_BAD_VALUE,
242     ERROR_PARAMS( s, t )
243   );
244 }
245 
require_attribute_value(store::Item_t const & element,char const * att_name,zstring * att_value)246 static void require_attribute_value( store::Item_t const &element,
247                                      char const *att_name,
248                                      zstring *att_value ) {
249   if ( !get_attribute_value( element, att_name, att_value ) )
250     throw XQUERY_EXCEPTION(
251       zerr::ZJSE0002_ELEMENT_MISSING_ATTRIBUTE,
252       ERROR_PARAMS( element->getNodeName()->getStringValue(), att_name )
253     );
254 }
255 
get_json_type(store::Item_t const & element,bool allow_all_types=true)256 static json::type get_json_type( store::Item_t const &element,
257                                  bool allow_all_types = true ) {
258   zstring att_value;
259   require_attribute_value( element, "type", &att_value );
260   if ( att_value == "array" )
261     return json::array;
262   if ( att_value == "object" )
263     return json::object;
264   if ( allow_all_types ) {
265     if ( att_value == "boolean" )
266       return json::boolean;
267     if ( att_value == "null" )
268       return json::null;
269     if ( att_value == "number" )
270       return json::number;
271     if ( att_value == "string" )
272       return json::string;
273   }
274   throw XQUERY_EXCEPTION(
275     zerr::ZJSE0003_BAD_ATTRIBUTE_VALUE,
276     ERROR_PARAMS( att_value, "type" )
277   );
278 }
279 
if_space_or_newline(std::ostream & o,whitespace::type ws)280 inline std::ostream& if_space_or_newline( std::ostream &o,
281                                           whitespace::type ws ) {
282   if ( ws == whitespace::some )
283     o << ' ';
284   else
285     o << if_emit( ws == whitespace::indent, '\n' );
286   return o;
287 }
DEF_OMANIP1(if_space_or_newline,whitespace::type)288 DEF_OMANIP1( if_space_or_newline, whitespace::type )
289 
290 static ostream& serialize_begin( ostream &o, json::type t,
291                                  whitespace::type ws ) {
292   switch ( t ) {
293     case json::array :
294       o << '[' << if_emit( ws, ' ' );
295       break;
296     case json::object:
297       o << '{' << if_space_or_newline( ws ) << if_indent( ws, inc_indent );
298       break;
299     default:
300       /* suppress warning */;
301   }
302   return o;
303 }
DEF_OMANIP2(serialize_begin,json::type,whitespace::type)304 DEF_OMANIP2( serialize_begin, json::type, whitespace::type )
305 
306 static ostream& serialize_end( ostream &o, json::type t, whitespace::type ws ) {
307   switch ( t ) {
308     case json::array:
309       o << if_emit( ws, ' ' ) << ']';
310       break;
311     case json::object:
312       o << if_space_or_newline( ws ) << if_indent( ws, dec_indent )
313         << if_indent( ws, indent ) << '}';
314       break;
315     default:
316       /* suppress warning */;
317   }
318   return o;
319 }
DEF_OMANIP2(serialize_end,json::type,whitespace::type)320 DEF_OMANIP2( serialize_end, json::type, whitespace::type )
321 
322 static ostream& serialize_boolean( ostream &o, zstring const &s ) {
323   assert_json_type( json::boolean, s );
324   return o << s;
325 }
DEF_OMANIP1(serialize_boolean,zstring const &)326 DEF_OMANIP1( serialize_boolean, zstring const& )
327 
328 static ostream& serialize_number( ostream &o, zstring const &s ) {
329   assert_json_type( json::number, s );
330   return o << s;
331 }
DEF_OMANIP1(serialize_number,zstring const &)332 DEF_OMANIP1( serialize_number, zstring const& )
333 
334 static ostream& serialize_string( ostream &o, zstring const &s ) {
335   ostringstream oss;
336   oss << '"' << json::serialize( s ) << '"';
337   string const temp( oss.str() );
338   assert_json_type( json::string, temp );
339   return o << temp;
340 }
341 DEF_OMANIP1( serialize_string, zstring const& )
342 
343 static ostream& serialize_children( ostream&, store::Item_t const&, json::type,
344                                     whitespace::type );
DEF_OMANIP3(serialize_children,store::Item_t const &,json::type,whitespace::type)345 DEF_OMANIP3( serialize_children, store::Item_t const&, json::type,
346              whitespace::type )
347 
348 static ostream& serialize_json_element( ostream &o,
349                                         store::Item_t const &element,
350                                         whitespace::type ws ) {
351   zstring const element_name( element->getNodeName()->getStringValue() );
352   if ( element_name != "json" )
353     throw XQUERY_EXCEPTION(
354       zerr::ZJSE0004_BAD_ELEMENT,
355       ERROR_PARAMS( element_name, "json" )
356     );
357 
358   json::type const t = get_json_type( element, false );
359 
360   return o
361     << serialize_begin( t, ws )
362     << serialize_children( element, t, ws )
363     << serialize_end( t, ws );
364 }
DEF_OMANIP2(serialize_json_element,store::Item_t const &,whitespace::type)365 DEF_OMANIP2( serialize_json_element, store::Item_t const&, whitespace::type )
366 
367 static ostream& serialize_item_element( ostream &o,
368                                         store::Item_t const &element,
369                                         whitespace::type ws ) {
370   zstring const element_name( element->getNodeName()->getStringValue() );
371   if ( element_name != "item" )
372     throw XQUERY_EXCEPTION(
373       zerr::ZJSE0005_BAD_CHILD_ELEMENT,
374       ERROR_PARAMS( element_name, "array", "item" )
375     );
376 
377   json::type const t = get_json_type( element );
378 
379   return o
380     << serialize_begin( t, ws )
381     << serialize_children( element, t, ws )
382     << serialize_end( t, ws );
383 }
DEF_OMANIP2(serialize_item_element,store::Item_t const &,whitespace::type)384 DEF_OMANIP2( serialize_item_element, store::Item_t const&, whitespace::type )
385 
386 static ostream& serialize_pair_element( ostream &o,
387                                         store::Item_t const &element,
388                                         whitespace::type ws ) {
389   zstring const element_name( element->getNodeName()->getStringValue() );
390   if ( element_name != "pair" )
391     throw XQUERY_EXCEPTION(
392       zerr::ZJSE0005_BAD_CHILD_ELEMENT,
393       ERROR_PARAMS( element_name, "object", "pair" )
394     );
395 
396   zstring name_att_value;
397   require_attribute_value( element, "name", &name_att_value );
398   json::type const t = get_json_type( element );
399 
400   return o
401     << if_indent( ws, indent ) << serialize_string( name_att_value )
402     << if_emit( ws, ' ' ) << ':' << if_emit( ws, ' ' )
403     << serialize_begin( t, ws )
404     << serialize_children( element, t, ws )
405     << serialize_end( t, ws );
406 }
DEF_OMANIP2(serialize_pair_element,store::Item_t const &,whitespace::type)407 DEF_OMANIP2( serialize_pair_element, store::Item_t const&, whitespace::type )
408 
409 static ostream& serialize_children( ostream &o, store::Item_t const &parent,
410                                     json::type parent_type,
411                                     whitespace::type ws ) {
412   if ( parent_type == json::null )
413     o << "null";
414   else {
415     oseparator sep;
416     if ( ws == whitespace::none )
417       sep.sep( "," );
418     else if ( ws == whitespace::some || parent_type == json::array )
419       sep.sep( ", " );
420     else
421       sep.sep( ",\n" );
422 
423     store::Iterator_t i = parent->getChildren();
424     i->open();
425     store::Item_t child;
426     while ( i->next( child ) ) {
427 
428       switch ( child->getNodeKind() ) {
429 
430         case store::StoreConsts::elementNode:
431           o << sep;
432           switch ( parent_type ) {
433             case json::none:
434               o << serialize_json_element( child, ws );
435               break;
436             case json::array:
437               o << serialize_item_element( child, ws );
438               break;
439             case json::object:
440               o << serialize_pair_element( child, ws );
441               break;
442             default:
443               throw XQUERY_EXCEPTION(
444                 zerr::ZJSE0006_NO_ELEMENT_CHILD,
445                 ERROR_PARAMS( json::type_string_of[ parent_type ] )
446               );
447           }
448           break;
449 
450         case store::StoreConsts::textNode:
451           o << sep;
452           switch ( parent_type ) {
453             case json::boolean:
454               o << serialize_boolean( child->getStringValue() );
455               break;
456             case json::number:
457               o << serialize_number( child->getStringValue() );
458               break;
459             case json::string:
460               o << serialize_string( child->getStringValue() );
461               break;
462             default:
463               throw XQUERY_EXCEPTION(
464                 zerr::ZJSE0007_NO_TEXT_CHILD,
465                 ERROR_PARAMS( json::type_string_of[ parent_type ] )
466               );
467           }
468           break;
469 
470         default:
471           // do nothing
472           break;
473       } // switch
474     } // while
475     i->close();
476   }
477   return o;
478 }
479 
480 ///////////////////////////////////////////////////////////////////////////////
481 
482 namespace snelson {
483 
serialize(ostream & o,store::Item_t const & item,whitespace::type ws)484 void serialize( ostream &o, store::Item_t const &item, whitespace::type ws ) {
485   switch ( item->getNodeKind() ) {
486     case store::StoreConsts::documentNode:
487       o << serialize_children( item, json::none, ws );
488       break;
489     case store::StoreConsts::elementNode:
490       o << serialize_json_element( item, ws );
491       break;
492     default:
493       throw XQUERY_EXCEPTION( zerr::ZJSE0001_NOT_DOCUMENT_OR_ELEMENT_NODE );
494   }
495 }
496 
497 } // namespace snelson
498 
499 ///////////////////////////////////////////////////////////////////////////////
500 
501 } // namespace zorba
502 /* vim:set et sw=2 ts=2: */
503