1 /////////////////////////////////////////////////////////////////////////////
2 // Name:        jsonreader.cpp
3 // Purpose:     the wxJSONReader class: a JSON text parser
4 // Author:      Luciano Cattani
5 // Created:     2007/10/14
6 // RCS-ID:      $Id: jsonreader.cpp,v 1.12 2008/03/12 10:48:19 luccat Exp $
7 // Copyright:   (c) 2007 Luciano Cattani
8 // Licence:     wxWidgets licence
9 /////////////////////////////////////////////////////////////////////////////
10 
11 #ifdef __GNUG__
12     #pragma implementation "jsonreader.cpp"
13 #endif
14 
15 #include <wx/jsonreader.h>
16 
17 #include <wx/mstream.h>
18 #include <wx/sstream.h>
19 #include <wx/debug.h>
20 #include <wx/log.h>
21 
22 
23 
24 /*! \class wxJSONReader
25  \brief The JSON parser
26 
27  The class is a JSON parser which reads a JSON formatted text and stores
28  values in the \c wxJSONValue structure.
29  The ctor accepts two parameters: the \e style flag, which controls how
30  much error-tolerant should the parser be and an integer which is
31  the maximum number of errors and warnings that have to be reported
32  (the default is 30).
33 
34  If the JSON text document does not contain an open/close JSON character the
35  function returns an \b invalid value object; in other words, the
36  wxJSONValue::IsValid() function returns FALSE.
37  This is the case of a document that is empty or contains only
38  whitespaces or comments.
39  If the document contains a starting object/array character immediatly
40  followed by a closing object/array character
41  (i.e.: \c {} ) then the function returns an \b empty array or object
42  JSON value.
43  This is a valid JSON object of type wxJSONTYPE_OBJECT or wxJSONTYPE_ARRAY
44  whose wxJSONValue::Size() function returns ZERO.
45 
46  \par JSON text
47 
48  The wxJSON parser just skips all characters read from the
49  input JSON text until the start-object '{' or start-array '[' characters
50  are encontered (see the GetStart() function).
51  This means that the JSON input text may contain anything
52  before the first start-object/array character except these two chars themselves
53  unless they are included in a C/C++ comment.
54  Comment lines that apear before the first start array/object character,
55  are non ignored if the parser is constructed with the wxJSONREADER_STORE_COMMENT
56  flag: they are added to the comment's array of the root JSON value.
57 
58  Note that the parsing process stops when the internal DoRead() function
59  returns. Because that function is recursive, the top-level close-object
60  '}' or close-array ']' character cause the top-level DoRead() function
61  to return thus stopping the parsing process regardless the EOF condition.
62  This means that the JSON input text may contain anything \b after
63  the top-level close-object/array character.
64  Here are some examples:
65 
66  Returns a wxJSONTYPE_INVALID value (invalid JSON value)
67  \code
68    // this text does not contain an open array/object character
69  \endcode
70 
71  Returns a wxJSONTYPE_OBJECT value of Size() = 0
72  \code
73    {
74    }
75  \endcode
76 
77  Returns a wxJSONTYPE_ARRAY value of Size() = 0
78  \code
79    [
80    ]
81  \endcode
82 
83  Text before and after the top-level open/close characters is ignored.
84  \code
85    This non-JSON text does not cause the parser to report errors or warnings
86    {
87    }
88    This non-JSON text does not cause the parser to report errors or warnings
89  \endcode
90 
91 
92  \par Extensions
93 
94  The wxJSON parser recognizes all JSON text plus some extensions
95  that are not part of the JSON syntax but that many other JSON
96  implementations do recognize.
97  If the input text contains the following non-JSON text, the parser
98  reports the situation as \e warnings and not as \e errors unless
99  the parser object was constructed with the wxJSONREADER_STRICT
100  flag. In the latter case the wxJSON parser is not tolerant.
101 
102  \li C/C++ comments: the parser recognizes C and C++ comments.
103     Comments can optionally be stored in the value they refer
104     to and can also be written back to the JSON text document.
105     To know more about comment storage see \ref wxjson_comments
106 
107  \li case tolerance: JSON syntax states that the literals \c null,
108     \c true and \c false must be lowercase; the wxJSON parser
109     also recognizes mixed case literals such as, for example,
110     \b Null or \b FaLSe.  A \e warning is emitted.
111 
112  \li wrong or missing closing character: wxJSON parser is tolerant
113     about the object / array closing character. When an open-array
114     character '[' is encontered, the parser expects the
115     corresponding close-array character ']'. If the character
116     encontered is a close-object char '}' a warning is reported.
117     A warning is also reported if the character is missing when
118     the end-of-file is reached.
119 
120  \li multi-line strings: this feature allows a JSON string type to be
121     splitted in two or more lines as in the standard C/C++
122     languages. The drawback is that this feature is error-prone
123     and you have to use it with care.
124     For more info about this topic read \ref wxjson_tutorial_style_split
125 
126  Note that you can control how much error-tolerant should the parser be
127  and also you can specify how many and what extensions are recognized.
128  See the constructor's parameters for more details.
129 
130  \par Unicode vs ANSI
131 
132  The parser can read JSON text from two very different kind of objects:
133 
134  \li a string object (\b wxString)
135  \li a stream object (\b wxInputStream)
136 
137  When the input is from a string object, the character represented in the
138  string is platform- and mode- dependant; in other words, characters are
139  represented differently: in ANSI builds they depend on the charset in use
140  and in Unicode builds they depend on the platform (UCS-2 on win32, UCS-4
141  or UTF-8 on GNU/Linux).
142 
143  When the input is from a stream object, the only recognized encoding format
144  is UTF-8 for both ANSI and Unicode builds.
145 
146  \par Example:
147 
148  \code
149   wxJSONValue  value;
150   wxJSONReader reader;
151 
152   // open a text file that contains the UTF-8 encoded JSON text
153   wxFFileInputStream jsonText( _T("filename.utf8"), _T("r"));
154 
155   // read the file
156   int numErrors = reader.Parse( jsonText, &value );
157 
158   if ( numErrors > 0 )  {
159     ::MessageBox( _T("Error reading the input file"));
160   }
161  \endcode
162 
163  Starting from version 1.1.0 the wxJSON reader and the writer has changed in
164  their internal organization.
165  To know more about ANSI and Unicode mode read \ref wxjson_tutorial_unicode.
166 */
167 
168 
169 
170 // if you have the debug build of wxWidgets and wxJSON you can see
171 // trace messages by setting the:
172 // WXTRACE=traceReader StoreComment
173 // environment variable
174 static const wxChar* traceMask = _T("traceReader");
175 static const wxChar* storeTraceMask = _T("StoreComment");
176 
177 
178 //! Ctor
179 /*!
180  Construct a JSON parser object with the given parameters.
181 
182  JSON parser objects should always be constructed on the stack but
183  it does not hurt to have a global JSON parser.
184 
185  \param flags this paramter controls how much error-tolerant should the
186         parser be
187 
188  \param maxErrors the maximum number of errors (and warnings, too) that are
189     reported by the parser. When the number of errors reaches this limit,
190     the parser stops to read the JSON input text and no other error is
191     reported.
192 
193  The \c flag parameter is the combination of ZERO or more of the
194  following constants OR'ed toghether:
195 
196  \li wxJSONREADER_ALLOW_COMMENTS: C/C++ comments are recognized by the
197      parser; a warning is reported by the parser
198  \li wxJSONREADER_STORE_COMMENTS: C/C++ comments, if recognized, are
199      stored in the value they refer to and can be rewritten back to
200      the JSON text
201  \li wxJSONREADER_CASE: the parser recognizes mixed-case literal strings
202  \li wxJSONREADER_MISSING: the parser allows missing or wrong close-object
203      and close-array characters
204  \li wxJSONREADER_MULTISTRING: strings may be splitted in two or more
205      lines
206  \li wxJSONREADER_COMMENTS_AFTER: if STORE_COMMENTS if defined, the parser
207      assumes that comment lines apear \b before the value they
208      refer to unless this constant is specified. In the latter case,
209      comments apear \b after the value they refer to.
210  \li wxJSONREADER_NOUTF8_STREAM: suppress UTF-8 conversion when reading a
211          string value from a stream: the reader assumes that the input stream
212          is encoded in ANSI format and not in UTF-8; only meaningfull in ANSI
213          builds, this flag is simply ignored in Unicode builds.
214 
215  You can also use the following shortcuts to specify some predefined
216  flag's combinations:
217 
218   \li wxJSONREADER_STRICT: all wxJSON extensions are reported as errors, this
219       is the same as specifying a ZERO value as \c flags.
220   \li wxJSONREADER_TOLERANT: this is the same as ALLOW_COMMENTS | CASE |
221       MISSING | MULTISTRING; all wxJSON extensions are turned on but comments
222       are not stored in the value objects.
223 
224  \par Example:
225 
226  The following code fragment construct a JSON parser, turns on all
227  wxJSON extensions and also stores C/C++ comments in the value object
228  they refer to. The parser assumes that the comments apear before the
229  value:
230 
231  \code
232    wxJSONReader reader( wxJSONREADER_TOLERANT | wxJSONREADER_STORE_COMMENTS );
233    wxJSONValue  root;
234    int numErrors = reader.Parse( jsonText, &root );
235  \endcode
236 */
wxJSONReader(int flags,int maxErrors)237 wxJSONReader::wxJSONReader( int flags, int maxErrors )
238 {
239     m_flags     = flags;
240     m_maxErrors = maxErrors;
241     m_noUtf8    = false;
242 #if !defined( wxJSON_USE_UNICODE )
243     // in ANSI builds we can suppress UTF-8 conversion for both the writer and the reader
244     if ( m_flags & wxJSONREADER_NOUTF8_STREAM )    {
245         m_noUtf8 = true;
246     }
247 #endif
248 
249 }
250 
251 //! Dtor - does nothing
~wxJSONReader()252 wxJSONReader::~wxJSONReader()
253 {
254 }
255 
256 //! Parse the JSON document.
257 /*!
258  The two overloaded versions of the \c Parse() function read a
259  JSON text stored in a wxString object or in a wxInputStream
260  object.
261 
262  If \c val is a NULL pointer, the function does not store the
263  values: it can be used as a JSON checker in order to check the
264  syntax of the document.
265  Returns the number of \b errors found in the document.
266  If the returned value is ZERO and the parser was constructed
267  with the \c wxJSONREADER_STRICT flag, then the parsed document
268  is \e well-formed and it only contains valid JSON text.
269 
270  If the \c wxJSONREADER_TOLERANT flag was used in the parser's
271  constructor, then a return value of ZERO
272  does not mean that the document is \e well-formed because it may
273  contain comments and other extensions that are not fatal for the
274  wxJSON parser but other parsers may fail to recognize.
275  You can use the \c GetWarningCount() function to know how many
276  wxJSON extensions are present in the JSON input text.
277 
278  Note that the JSON value object \c val is not cleared by this
279  function unless its type is of the wrong type.
280  In other words, if \c val is of type wxJSONTYPE_ARRAY and it already
281  contains 10 elements and the input document starts with a
282  '[' (open-array char) then the elements read from the document are
283  \b appended to the existing ones.
284 
285  On the other hand, if the text document starts with a '{' (open-object) char
286  then this function must change the type of the \c val object to
287  \c wxJSONTYPE_OBJECT and the old content of 10 array elements will be lost.
288 
289  \par Different input types
290 
291  The real parsing process in done using UTF-8 streams. If the input is
292  from a \b wxString object, the Parse function first converts the input string
293  in a temporary \b wxMemoryInputStream which contains the UTF-8 conversion
294  of the string itself.
295  Next, the overloaded Parse function is called.
296 
297  @param doc    the JSON text that has to be parsed
298  @param val    the wxJSONValue object that contains the parsed text; if NULL the
299          parser do not store anything but errors and warnings are reported
300  @return the total number of errors encontered
301 */
302 int
Parse(const wxString & doc,wxJSONValue * val)303 wxJSONReader:: Parse( const wxString& doc, wxJSONValue* val )
304 {
305 #if !defined( wxJSON_USE_UNICODE )
306     // in ANSI builds input from a string never use UTF-8 conversion
307     bool noUtf8_bak = m_noUtf8;        // save the current setting
308     m_noUtf8 = true;
309 #endif
310 
311     // convert the string to a UTF-8 / ANSI memory stream and calls overloaded Parse()
312     char* readBuff = 0;
313     wxCharBuffer utf8CB = doc.ToUTF8();        // the UTF-8 buffer
314 #if !defined( wxJSON_USE_UNICODE )
315     wxCharBuffer ansiCB( doc.c_str());        // the ANSI buffer
316     if ( m_noUtf8 )    {
317         readBuff = ansiCB.data();
318     }
319     else    {
320         readBuff = utf8CB.data();
321     }
322 #else
323         readBuff = utf8CB.data();
324 #endif
325 
326     // now construct the temporary memory input stream
327     size_t len = strlen( readBuff );
328     wxMemoryInputStream is( readBuff, len );
329 
330     int numErr = Parse( is, val );
331 #if !defined( wxJSON_USE_UNICODE )
332     m_noUtf8 = noUtf8_bak;
333 #endif
334     return numErr;
335 }
336 
337 //! \overload Parse( const wxString&, wxJSONValue* )
338 int
Parse(wxInputStream & is,wxJSONValue * val)339 wxJSONReader::Parse( wxInputStream& is, wxJSONValue* val )
340 {
341     // if val == 0 the 'temp' JSON value will be passed to DoRead()
342     wxJSONValue temp;
343     m_level    = 0;
344     m_depth    = 0;
345     m_lineNo   = 1;
346     m_colNo    = 1;
347     m_peekChar = -1;
348     m_errors.clear();
349     m_warnings.clear();
350 
351     // if a wxJSONValue is not passed to the Parse function
352     // we set the temparary object created on the stack
353     // I know this will slow down the validation of input
354     if ( val == 0 )  {
355         val = &temp;
356     }
357     wxASSERT( val );
358 
359     // set the wxJSONValue object's pointers for comment storage
360     m_next       = val;
361     m_next->SetLineNo( -1 );
362     m_lastStored = 0;
363     m_current    = 0;
364 
365     int ch = GetStart( is );
366     switch ( ch )  {
367         case '{' :
368         val->SetType( wxJSONTYPE_OBJECT );
369         break;
370     case '[' :
371         val->SetType( wxJSONTYPE_ARRAY );
372         break;
373     default :
374         AddError( _T("Cannot find a start object/array character" ));
375         return m_errors.size();
376         break;
377     }
378 
379     // returning from DoRead() could be for EOF or for
380     // the closing array-object character
381     // if -1 is returned, it is as an error because the lack
382     // of close-object/array characters
383     // note that the missing close-chars error messages are
384     // added by the DoRead() function
385     /*ch =*/ DoRead( is, *val );
386     return m_errors.size();
387 }
388 
389 
390 //! Returns the start of the document
391 /*!
392  This is the first function called by the Parse() function and it searches
393  the input stream for the starting character of a JSON text and returns it.
394  JSON text start with '{' or '['.
395  If the two starting characters are inside a C/C++ comment, they
396  are ignored.
397  Returns the JSON-text start character or -1 on EOF.
398 
399  @param is    the input stream that contains the JSON text
400  @return -1 on errors or EOF; one of '{' or '['
401 */
402 int
GetStart(wxInputStream & is)403 wxJSONReader::GetStart( wxInputStream& is )
404 {
405     int ch = 0;
406     do  {
407         switch ( ch )  {
408             case 0 :
409                 ch = ReadChar( is );
410                 break;
411             case '{' :
412                 return ch;
413                 break;
414             case '[' :
415                 return ch;
416                 break;
417             case '/' :
418                 ch = SkipComment( is );
419                 StoreComment( 0 );
420                 break;
421             default :
422                 ch = ReadChar( is );
423                 break;
424         }
425     } while ( ch >= 0 );
426     return ch;
427 }
428 
429 //! Return a reference to the error message's array.
430 const wxArrayString&
GetErrors() const431 wxJSONReader::GetErrors() const
432 {
433     return m_errors;
434 }
435 
436 //! Return a reference to the warning message's array.
437 const wxArrayString&
GetWarnings() const438 wxJSONReader::GetWarnings() const
439 {
440     return m_warnings;
441 }
442 
443 //! Return the depth of the JSON input text
444 /*!
445  The function returns the number of times the recursive \c DoRead function was
446  called in the parsing process thus returning the maximum depth of the JSON
447  input text.
448 */
449 int
GetDepth() const450 wxJSONReader::GetDepth() const
451 {
452     return m_depth;
453 }
454 
455 
456 
457 //! Return the size of the error message's array.
458 int
GetErrorCount() const459 wxJSONReader::GetErrorCount() const
460 {
461     return m_errors.size();
462 }
463 
464 //! Return the size of the warning message's array.
465 int
GetWarningCount() const466 wxJSONReader::GetWarningCount() const
467 {
468     return m_warnings.size();
469 }
470 
471 
472 //! Read a character from the input JSON document.
473 /*!
474  The function returns the next byte from the UTF-8 stream as an INT.
475  In case of errors or EOF, the function returns -1.
476  The function also updates the \c m_lineNo and \c m_colNo data
477  members and converts all CR+LF sequence in LF.
478 
479  This function only returns one byte UTF-8 (one code unit)
480  at a time and not Unicode code points.
481  The only reason for this function is to process line and column
482  numbers.
483 
484  @param is    the input stream that contains the JSON text
485  @return the next char (one single byte) in the input stream or -1 on error or EOF
486 */
487 int
ReadChar(wxInputStream & is)488 wxJSONReader::ReadChar( wxInputStream& is )
489 {
490     if ( is.Eof())    {
491         return -1;
492     }
493 
494     unsigned char ch = is.GetC();
495     size_t last = is.LastRead();    // returns ZERO if EOF
496     if ( last == 0 )    {
497         return -1;
498     }
499 
500     // the function also converts CR in LF. only LF is returned
501     // in the case of CR+LF
502     int nextChar;
503 
504     if ( ch == '\r' )  {
505         m_colNo = 1;
506         nextChar = PeekChar( is );
507         if ( nextChar == -1 )  {
508             return -1;
509         }
510         else if ( nextChar == '\n' )    {
511             ch = is.GetC();
512         }
513     }
514     if ( ch == '\n' )  {
515         ++m_lineNo;
516         m_colNo = 1;
517     }
518     else  {
519         ++m_colNo;
520     }
521     return (int) ch;
522 }
523 
524 
525 //! Peek a character from the input JSON document
526 /*!
527  This function just calls the \b Peek() function on the stream
528  and returns it.
529 
530  @param is    the input stream that contains the JSON text
531  @return the next char (one single byte) in the input stream or -1 on error or EOF
532 */
533 int
PeekChar(wxInputStream & is)534 wxJSONReader::PeekChar( wxInputStream& is )
535 {
536     int ch = -1; unsigned char c;
537     if ( !is.Eof())    {
538         c = is.Peek();
539         ch = c;
540     }
541     return ch;
542 }
543 
544 
545 //! Reads the JSON text document (internal use)
546 /*!
547  This is a recursive function that is called by \c Parse()
548  and by the \c DoRead() function itself when a new object /
549  array character is encontered.
550  The function returns when a EOF condition is encontered or
551  when the corresponding close-object / close-array char is encontered.
552  The function also increments the \c m_level
553  data member when it is entered and decrements it on return.
554  It also sets \c m_depth equal to \c m_level if \c m_depth is
555  less than \c m_level.
556 
557  The function is the heart of the wxJSON parser class but it is
558  also very easy to understand because JSON syntax is very
559  easy.
560 
561  Returns the last close-object/array character read or -1 on EOF.
562 
563  @param is    the input stream that contains the JSON text
564  @param parent the JSON value object that is the parent of all subobjects
565          read by the function until the next close-object/array (for
566          the top-level \c DoRead function \c parent is the root JSON object)
567  @return one of close-array or close-object char or -1 on error or EOF
568 */
569 int
DoRead(wxInputStream & is,wxJSONValue & parent)570 wxJSONReader::DoRead( wxInputStream& is, wxJSONValue& parent )
571 {
572     ++m_level;
573     if ( m_depth < m_level )    {
574         m_depth = m_level;
575     }
576 
577     // 'value' is the wxJSONValue structure that has to be
578     // read. Data read from the JSON text input is stored
579     // in the following object.
580     wxJSONValue value( wxJSONTYPE_INVALID );
581 
582     // sets the pointers to the current, next and last-stored objects
583     // in order to determine the value to which a comment refers to
584     m_next = &value;
585     m_current = &parent;
586     m_current->SetLineNo( m_lineNo );
587     m_lastStored = 0;
588 
589     // the 'key' string is stored from 'value' when a ':' is encontered
590     wxString  key;
591 
592     // the character read: -1=EOF, 0=to be read
593     int ch=0;
594 
595     do {                   // we read until ch < 0
596         switch ( ch )  {
597             case 0 :
598                 ch = ReadChar( is );
599                 break;
600             case ' ' :
601             case '\t' :
602             case '\n' :
603             case '\r' :
604                 ch = SkipWhiteSpace( is );
605                 break;
606             case -1 :   // the EOF
607                 break;
608             case '/' :
609                 ch = SkipComment( is );
610                 StoreComment( &parent );
611                 break;
612 
613             case '{' :
614                 if ( parent.IsObject() ) {
615                     if ( key.empty() )   {
616                         AddError( _T("\'{\' is not allowed here (\'name\' is missing") );
617                     }
618                     if ( value.IsValid() )   {
619                         AddError( _T("\'{\' cannot follow a \'value\'") );
620                           }
621                 }
622                 else if ( parent.IsArray() )  {
623                     if ( value.IsValid() )   {
624                         AddError( _T("\'{\' cannot follow a \'value\' in JSON array") );
625                     }
626                 }
627                 else  {
628                     wxJSON_ASSERT( 0 );       // always fails
629                 }
630 
631                 // the openobject char cause the DoRead() to be called recursively
632                 value.SetType( wxJSONTYPE_OBJECT );
633                 ch = DoRead( is, value );
634                 break;
635 
636             case '}' :
637                 if ( !parent.IsObject() )  {
638                     AddWarning( wxJSONREADER_MISSING,
639                     _T("Trying to close an array using the \'}\' (close-object) char" ));
640                 }
641                 // close-object: store the current value, if any
642                 StoreValue( ch, key, value, parent );
643                 m_current = &parent;
644                 m_next    = 0;
645                 m_current->SetLineNo( m_lineNo );
646                 ch = ReadChar( is );
647                 return ch;
648                 break;
649 
650             case '[' :
651                 if ( parent.IsObject() ) {
652                     if ( key.empty() )   {
653                         AddError( _T("\'[\' is not allowed here (\'name\' is missing") );
654                     }
655                     if ( value.IsValid() )   {
656                         AddError( _T("\'[\' cannot follow a \'value\' text") );
657                     }
658                 }
659                 else if ( parent.IsArray())  {
660                     if ( value.IsValid() )   {
661                         AddError( _T("\'[\' cannot follow a \'value\'") );
662                     }
663                 }
664                 else  {
665                     wxJSON_ASSERT( 0 );       // always fails
666                 }
667                 // open-array cause the DoRead() to be called recursively
668                 value.SetType( wxJSONTYPE_ARRAY );
669                 ch = DoRead( is, value );
670                 break;
671 
672             case ']' :
673                 if ( !parent.IsArray() )  {
674                     // wrong close-array char (should be close-object)
675                     AddWarning( wxJSONREADER_MISSING,
676                     _T("Trying to close an object using the \']\' (close-array) char" ));
677                 }
678                 StoreValue( ch, key, value, parent );
679                 m_current = &parent;
680                 m_next    = 0;
681                 m_current->SetLineNo( m_lineNo );
682                 return 0;   // returning ZERO for reading the next char
683                 break;
684 
685             case ',' :
686                 // store the value, if any
687                 StoreValue( ch, key, value, parent );
688                 key.clear();
689                 ch = ReadChar( is );
690                 break;
691 
692             case '\"' :
693                 ch = ReadString( is, value );     // read a JSON string type
694                 m_current = &value;
695                 m_next    = 0;
696                 break;
697 
698             case '\'' :
699                 ch = ReadMemoryBuff( is, value );  // read a memory buffer type
700                 m_current = &value;
701                 m_next    = 0;
702                 break;
703 
704             case ':' :   // key / value separator
705                 m_current = &value;
706                 m_current->SetLineNo( m_lineNo );
707                 m_next    = 0;
708                 if ( !parent.IsObject() )  {
709                     AddError( _T( "\':\' can only used in object's values" ));
710                 }
711                 else if ( !value.IsString() )  {
712                     AddError( _T( "\':\' follows a value which is not of type \'string\'" ));
713                 }
714                 else if ( !key.empty() )  {
715                     AddError( _T( "\':\' not allowed where a \'name\' string was already available" ));
716                 }
717                 else  {
718                     // the string in 'value' is set as the 'key'
719                     key = value.AsString();
720                     value.SetType( wxJSONTYPE_INVALID );
721                 }
722                 ch = ReadChar( is );
723                 break;
724 
725             default :
726                 // no special char: it is a literal or a number
727                 // errors are checked in the 'ReadValue()' function.
728                 m_current = &value;
729                 m_current->SetLineNo( m_lineNo );
730                 m_next    = 0;
731                 ch = ReadValue( is, ch, value );
732                 break;
733         } // end switch
734     } while ( ch >= 0 );
735 
736     // the DoRead() should return when the close-object/array char is encontered
737     // if we are here, the EOF condition was encontered so one or more close-something
738     // characters are missing
739     if ( parent.IsArray() )  {
740         AddWarning( wxJSONREADER_MISSING, _T("\']\' missing at end of file"));
741     }
742     else if ( parent.IsObject() )  {
743         AddWarning( wxJSONREADER_MISSING, _T("\'}\' missing at end of file"));
744     }
745     else  {
746         wxJSON_ASSERT( 0 );
747     }
748 
749     // we store the value, as there is a missing close-object/array char
750     StoreValue( ch, key, value, parent );
751 
752     --m_level;
753     return ch;
754 }
755 
756 //! Store a value in the parent object.
757 /*!
758  The function is called by \c DoRead() when a the comma
759  or a close-object/array character is encontered and stores the current
760  value read by the parser in the parent object.
761  The function checks that \c value is not invalid and that \c key is
762  not an empty string if \c parent is an object.
763 
764  \param ch    the character read: a comma or close objecty/array char
765  \param key    the \b key string: must be empty if \c parent is an array
766  \param value    the current JSON value to be stored in \c parent
767  \param parent    the JSON value that is the parent of \c value.
768  \return none
769 */
770 void
StoreValue(int ch,const wxString & key,wxJSONValue & value,wxJSONValue & parent)771 wxJSONReader::StoreValue( int ch, const wxString& key, wxJSONValue& value, wxJSONValue& parent )
772 {
773     // if 'ch' == } or ] than value AND key may be empty when a open object/array
774     // is immediatly followed by a close object/array
775     //
776     // if 'ch' == , (comma) value AND key (for TypeMap) cannot be empty
777     //
778     wxLogTrace( traceMask, _T("(%s) ch=%d char=%c"), __PRETTY_FUNCTION__, ch, (char) ch);
779     wxLogTrace( traceMask, _T("(%s) value=%s"), __PRETTY_FUNCTION__, value.AsString().c_str());
780 
781     m_current = 0;
782     m_next    = &value;
783     m_lastStored = 0;
784     m_next->SetLineNo( -1 );
785 
786     if ( !value.IsValid() && key.empty() ) {
787         // OK, if the char read is a close-object or close-array
788         if ( ch == '}' || ch == ']' )  {
789             m_lastStored = 0;
790             wxLogTrace( traceMask, _T("(%s) key and value are empty, returning"),
791                              __PRETTY_FUNCTION__);
792         }
793         else  {
794             AddError( _T("key or value is missing for JSON value"));
795         }
796     }
797     else  {
798         // key or value are not empty
799         if ( parent.IsObject() )  {
800             if ( !value.IsValid() ) {
801                 AddError( _T("cannot store the value: \'value\' is missing for JSON object type"));
802              }
803              else if ( key.empty() ) {
804                 AddError( _T("cannot store the value: \'key\' is missing for JSON object type"));
805             }
806             else  {
807                 // OK, adding the value to parent key/value map
808                 wxLogTrace( traceMask, _T("(%s) adding value to key:%s"),
809                      __PRETTY_FUNCTION__, key.c_str());
810                 parent[key] = value;
811                 m_lastStored = &(parent[key]);
812                 m_lastStored->SetLineNo( m_lineNo );
813             }
814         }
815         else if ( parent.IsArray() ) {
816             if ( !value.IsValid() ) {
817                     AddError( _T("cannot store the item: \'value\' is missing for JSON array type"));
818             }
819             if ( !key.empty() ) {
820                 AddError( _T("cannot store the item: \'key\' (\'%s\') is not permitted in JSON array type"), key);
821             }
822             wxLogTrace( traceMask, _T("(%s) appending value to parent array"),
823                                  __PRETTY_FUNCTION__ );
824             parent.Append( value );
825             const wxJSONInternalArray* arr = parent.AsArray();
826             wxJSON_ASSERT( arr );
827             m_lastStored = &(arr->Last());
828             m_lastStored->SetLineNo( m_lineNo );
829         }
830         else  {
831             wxJSON_ASSERT( 0 );  // should never happen
832         }
833     }
834     value.SetType( wxJSONTYPE_INVALID );
835     value.ClearComments();
836 }
837 
838 //! Add a error message to the error's array
839 /*!
840  The overloaded versions of this function add an error message to the
841  error's array stored in \c m_errors.
842  The error message is formatted as follows:
843 
844  \code
845    Error: line xxx, col xxx - <error_description>
846  \endcode
847 
848  The \c msg parameter is the description of the error; line's and column's
849  number are automatically added by the functions.
850  The \c fmt parameter is a format string that has the same syntax as the \b printf
851  function.
852  Note that it is the user's responsability to provide a format string suitable
853  with the arguments: another string or a character.
854 */
855 void
AddError(const wxString & msg)856 wxJSONReader::AddError( const wxString& msg )
857 {
858     wxString err;
859     err.Printf( _T("Error: line %d, col %d - %s"), m_lineNo, m_colNo, msg.c_str() );
860 
861     wxLogTrace( traceMask, _T("(%s) %s"), __PRETTY_FUNCTION__, err.c_str());
862 
863     if ( (int) m_errors.size() < m_maxErrors )  {
864         m_errors.Add( err );
865     }
866     else if ( (int) m_errors.size() == m_maxErrors )  {
867         m_errors.Add( _T("ERROR: too many error messages - ignoring further errors"));
868     }
869     // else if ( m_errors > m_maxErrors ) do nothing, thus ignore the error message
870 }
871 
872 //! \overload AddError( const wxString& )
873 void
AddError(const wxString & fmt,const wxString & str)874 wxJSONReader::AddError( const wxString& fmt, const wxString& str )
875 {
876     wxString s;
877     s.Printf( fmt.c_str(), str.c_str() );
878     AddError( s );
879 }
880 
881 //! \overload AddError( const wxString& )
882 void
AddError(const wxString & fmt,wxChar c)883 wxJSONReader::AddError( const wxString& fmt, wxChar c )
884 {
885     wxString s;
886     s.Printf( fmt.c_str(), c );
887     AddError( s );
888 }
889 
890 //! Add a warning message to the warning's array
891 /*!
892  The warning description is as follows:
893  \code
894    Warning: line xxx, col xxx - <warning_description>
895  \endcode
896 
897  Warning messages are generated by the parser when the JSON
898  text that has been read is not well-formed but the
899  error is not fatal and the parser recognizes the text
900  as an extension to the JSON standard (see the parser's ctor
901  for more info about wxJSON extensions).
902 
903  Note that the parser has to be constructed with a flag that
904  indicates if each individual wxJSON extension is on.
905  If the warning message is related to an extension that is not
906  enabled in the parser's \c m_flag data member, this function
907  calls AddError() and the warning message becomes an error
908  message.
909  The \c type parameter is one of the same constants that
910  specify the parser's extensions.
911  If type is ZERO than the function always adds a warning
912 */
913 void
AddWarning(int type,const wxString & msg)914 wxJSONReader::AddWarning( int type, const wxString& msg )
915 {
916     // if 'type' AND 'm_flags' == 1 than the extension is
917     // ON. Otherwise it is OFF anf the function calls AddError()
918     if ( type != 0 )    {
919         if ( ( type & m_flags ) == 0 )  {
920             AddError( msg );
921             return;
922         }
923     }
924 
925     wxString err;
926     err.Printf( _T( "Warning: line %d, col %d - %s"), m_lineNo, m_colNo, msg.c_str() );
927 
928     wxLogTrace( traceMask, _T("(%s) %s"), __PRETTY_FUNCTION__, err.c_str());
929     if ( (int) m_warnings.size() < m_maxErrors )  {
930         m_warnings.Add( err );
931     }
932     else if ( (int) m_warnings.size() == m_maxErrors )  {
933         m_warnings.Add( _T("Error: too many warning messages - ignoring further warnings"));
934     }
935     // else do nothing, thus ignore the warning message
936 }
937 
938 //! Skip all whitespaces.
939 /*!
940  The function reads characters from the input text
941  and returns the first non-whitespace character read or -1
942  if EOF.
943  Note that the function does not rely on the \b isspace function
944  of the C library but checks the space constants: space, TAB and
945  LF.
946 */
947 int
SkipWhiteSpace(wxInputStream & is)948 wxJSONReader::SkipWhiteSpace( wxInputStream& is )
949 {
950     // just read one byte at a time and check for whitespaces
951     int ch;
952     do {
953         ch = ReadChar( is );
954         if ( ch < 0 )  {
955             break;
956         }
957     }
958     while ( ch == ' ' || ch == '\n' || ch == '\t' );
959     wxLogTrace( traceMask, _T("(%s) end whitespaces line=%d col=%d"),
960              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
961     return ch;
962 }
963 
964 //! Skip a comment
965 /*!
966  The function is called by DoRead() when a '/' (slash) character
967  is read from the input stream assuming that a C/C++ comment is starting.
968  Returns the first character that follows the comment or
969  -1 on EOF.
970  The function also adds a warning message because comments are not
971  valid JSON text.
972  The function also stores the comment, if any, in the \c m_comment data
973  member: it can be used by the DoRead() function if comments have to be
974  stored in the value they refer to.
975 */
976 int
SkipComment(wxInputStream & is)977 wxJSONReader::SkipComment( wxInputStream& is )
978 {
979     static const wxChar* warn =
980     _T("Comments may be tolerated in JSON text but they are not part of JSON syntax");
981 
982     // if it is a comment, then a warning is added to the array
983     // otherwise it is an error: values cannot start with a '/'
984     // read the char next to the first slash
985     int ch = ReadChar( is );
986     if ( ch < 0 )  {
987         return -1;
988     }
989 
990     wxLogTrace( storeTraceMask, _T("(%s) start comment line=%d col=%d"),
991              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
992 
993     // the temporary UTF-8/ANSI buffer that holds the comment string. This will be
994     // converted to a wxString object using wxString::FromUTF8() or From8BitData()
995     wxMemoryBuffer utf8Buff;
996     unsigned char c;
997 
998     if ( ch == '/' )  {         // C++ comment, read until end-of-line
999         // C++ comment strings are in UTF-8 format. we store all
1000         // UTF-8 code units until the first LF or CR+LF
1001         AddWarning( wxJSONREADER_ALLOW_COMMENTS, warn );
1002         m_commentLine = m_lineNo;
1003         utf8Buff.AppendData( "//", 2 );
1004 
1005         while ( ch >= 0 )  {
1006             if ( ch == '\n' )    {
1007                 break;
1008             }
1009             if ( ch == '\r' )    {
1010                 ch = PeekChar( is );
1011                 if ( ch == '\n' )    {
1012                     ch = ReadChar( is );
1013                 }
1014                 break;
1015             }
1016             else    {
1017                 // store the char in the UTF8 temporary buffer
1018                 c = (unsigned char) ch;
1019                 utf8Buff.AppendByte( c );
1020             }
1021             ch = ReadChar( is );
1022         }
1023         // now convert the temporary UTF-8 buffer
1024         m_comment = wxString::FromUTF8( (const char*) utf8Buff.GetData(),
1025                         utf8Buff.GetDataLen());
1026     }
1027 
1028     // check if a C-style comment
1029     else if ( ch == '*' )  {     // C-style comment
1030         AddWarning(wxJSONREADER_ALLOW_COMMENTS, warn );
1031         m_commentLine = m_lineNo;
1032         utf8Buff.AppendData( "/*", 2 );
1033         while ( ch >= 0 ) {
1034             // check the END-COMMENT chars ('*/')
1035             if ( ch == '*' )    {
1036                 ch = PeekChar( is );
1037                 if ( ch == '/' )    {
1038                     ch = ReadChar( is );  // read the '/' char
1039                     ch = ReadChar( is );  // read the next char that will be returned
1040                     utf8Buff.AppendData( "*/", 2 );
1041                     break;
1042                 }
1043             }
1044             // store the char in the UTF8 temporary buffer
1045             c = (unsigned char) ch;
1046             utf8Buff.AppendByte( c );
1047             ch = ReadChar( is );
1048         }
1049         // now convert the temporary buffer in a wxString object
1050         if ( m_noUtf8 )    {
1051             m_comment = wxString::From8BitData( (const char*) utf8Buff.GetData(),
1052                                 utf8Buff.GetDataLen());
1053         }
1054         else    {
1055             m_comment = wxString::FromUTF8( (const char*) utf8Buff.GetData(),
1056                                 utf8Buff.GetDataLen());
1057         }
1058     }
1059 
1060     else  {   // it is not a comment, return the character next the first '/'
1061         AddError( _T( "Strange '/' (did you want to insert a comment?)"));
1062         // we read until end-of-line OR end of C-style comment OR EOF
1063         // because a '/' should be a start comment
1064         while ( ch >= 0 ) {
1065             ch = ReadChar( is );
1066             if ( ch == '*' && PeekChar( is ) == '/' )  {
1067                 break;
1068             }
1069             if ( ch == '\n' )  {
1070                 break;
1071             }
1072         }
1073         // read the next char that will be returned
1074         ch = ReadChar( is );
1075     }
1076     wxLogTrace( traceMask, _T("(%s) end comment line=%d col=%d"),
1077              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1078     wxLogTrace( storeTraceMask, _T("(%s) end comment line=%d col=%d"),
1079              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1080     wxLogTrace( storeTraceMask, _T("(%s) comment=%s"),
1081              __PRETTY_FUNCTION__, m_comment.c_str());
1082     return ch;
1083 }
1084 
1085 //! Read a string value
1086 /*!
1087  The function reads a string value from input stream and it is
1088  called by the \c DoRead() function when it enconters the
1089  double quote characters.
1090  The function read all bytes up to the next double quotes
1091  (unless it is escaped) and stores them in a temporary UTF-8
1092  memory buffer.
1093  Also, the function processes the escaped characters defined
1094  in the JSON syntax.
1095 
1096  Next, the function tries to convert the UTF-8 buffer to a
1097  \b wxString object using the \b wxString::FromUTF8 function.
1098  Depending on the build mode, we can have the following:
1099  \li in Unicode the function always succeeds, provided that the
1100     buffer contains valid UTF-8 code units.
1101 
1102  \li in ANSI builds the conversion may fail because of the presence of
1103     unrepresentable characters in the current locale. In this case,
1104     the default behaviour is to perform a char-by-char conversion; every
1105     char that cannot be represented in the current locale is stored as
1106     \e unicode \e escaped \e sequence
1107 
1108  \li in ANSI builds, if the reader is constructed with the wxJSONREADER_NOUTF8_STREAM
1109      then no conversion takes place and the UTF-8 temporary buffer is simply
1110      \b copied to the \b wxString object
1111 
1112  The string is, finally, stored in the provided wxJSONValue argument
1113  provided that it is empty or it contains a string value.
1114  This is because the parser class recognizes multi-line strings
1115  like the following one:
1116  \code
1117    [
1118       "This is a very long string value which is splitted into more"
1119       "than one line because it is more human readable"
1120    ]
1121  \endcode
1122  Because of the lack of the value separator (,) the parser
1123  assumes that the string was splitted into several double-quoted
1124  strings.
1125  If the value does not contain a string then an error is
1126  reported.
1127  Splitted strings cause the parser to report a warning.
1128 */
1129 int
ReadString(wxInputStream & is,wxJSONValue & val)1130 wxJSONReader::ReadString( wxInputStream& is, wxJSONValue& val )
1131 {
1132     // the char last read is the opening qoutes (")
1133 
1134     wxMemoryBuffer utf8Buff;
1135     char ues[8];        // stores a Unicode Escaped Esquence: \uXXXX
1136 
1137     int ch = 0;
1138     while ( ch >= 0 ) {
1139         ch = ReadChar( is );
1140         unsigned char c = (unsigned char) ch;
1141         if ( ch == '\\' )  {    // an escape sequence
1142             ch = ReadChar( is );
1143             switch ( ch )  {
1144                 case -1 :        // EOF
1145                     break;
1146                 case 't' :
1147                     utf8Buff.AppendByte( '\t' );
1148                     break;
1149                 case 'n' :
1150                     utf8Buff.AppendByte( '\n' );
1151                     break;
1152                 case 'b' :
1153                     utf8Buff.AppendByte( '\b' );
1154                     break;
1155                 case 'r' :
1156                     utf8Buff.AppendByte( '\r' );
1157                     break;
1158                 case '\"' :
1159                     utf8Buff.AppendByte( '\"' );
1160                     break;
1161                 case '\\' :
1162                     utf8Buff.AppendByte( '\\' );
1163                     break;
1164                 case '/' :
1165                     utf8Buff.AppendByte( '/' );
1166                     break;
1167                 case 'f' :
1168                     utf8Buff.AppendByte( '\f' );
1169                     break;
1170                 case 'u' :
1171                     ch = ReadUES( is, ues );
1172                     if ( ch < 0 ) {        // if EOF, returns
1173                         return ch;
1174                     }
1175                     // append the escaped character to the UTF8 buffer
1176                     AppendUES( utf8Buff, ues );
1177                     // many thanks to Bryan Ashby who discovered this bug
1178                     continue;
1179                     // break;
1180                 default :
1181                     AddError( _T( "Unknow escaped character \'\\%c\'"), ch );
1182             }
1183         }
1184         else {
1185             // we have read a non-escaped character so we have to append it to
1186             // the temporary UTF-8 buffer until the next quote char
1187             if ( ch == '\"' )    {
1188                 break;
1189             }
1190             utf8Buff.AppendByte( c );
1191         }
1192     }
1193 
1194     // if UTF-8 conversion is disabled (ANSI builds only) we just copy the
1195     // bit data to a wxString object
1196     wxString s;
1197     if ( m_noUtf8 )    {
1198         s = wxString::From8BitData( (const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1199     }
1200     else    {
1201         // perform UTF-8 conversion
1202         // first we check that the UTF-8 buffer is correct, i.e. it contains valid
1203         // UTF-8 code points.
1204         // this works in both ANSI and Unicode builds.
1205         size_t convLen = wxConvUTF8.ToWChar( 0,        // wchar_t destination
1206                         0,                            // size_t  destLenght
1207             (const char*) utf8Buff.GetData(),        // char_t  source
1208                 utf8Buff.GetDataLen());                // size_t  sourceLenght
1209 
1210         if ( convLen == wxCONV_FAILED )    {
1211             AddError( _T( "String value: the UTF-8 stream is invalid"));
1212             s.append( _T( "<UTF-8 stream not valid>"));
1213         }
1214         else    {
1215 #if defined( wxJSON_USE_UNICODE )
1216             // in Unicode just convert to wxString
1217             s = wxString::FromUTF8( (const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1218 #else
1219             // in ANSI, the conversion may fail and an empty string is returned
1220             // in this case, the reader do a char-by-char conversion storing
1221               // unicode escaped sequences of unrepresentable characters
1222             s = wxString::FromUTF8( (const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1223             if ( s.IsEmpty() )    {
1224                 int r = ConvertCharByChar( s, utf8Buff );    // return number of escaped sequences
1225                 if ( r > 0 )    {
1226                     AddWarning( 0, _T( "The string value contains unrepresentable Unicode characters"));
1227                 }
1228             }
1229 #endif
1230         }
1231      }
1232     wxLogTrace( traceMask, _T("(%s) line=%d col=%d"),
1233              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1234     wxLogTrace( traceMask, _T("(%s) string read=%s"),
1235              __PRETTY_FUNCTION__, s.c_str() );
1236     wxLogTrace( traceMask, _T("(%s) value=%s"),
1237              __PRETTY_FUNCTION__, val.AsString().c_str() );
1238 
1239     // now assign the string to the JSON-value 'value'
1240     // must check that:
1241     //   'value'  is empty
1242     //   'value'  is a string; concatenate it but emit warning
1243     if ( !val.IsValid() )   {
1244         wxLogTrace( traceMask, _T("(%s) assigning the string to value"), __PRETTY_FUNCTION__ );
1245         val = s ;
1246     }
1247     else if ( val.IsString() )  {
1248         AddWarning( wxJSONREADER_MULTISTRING,
1249             _T("Multiline strings are not allowed by JSON syntax") );
1250         wxLogTrace( traceMask, _T("(%s) concatenate the string to value"), __PRETTY_FUNCTION__ );
1251         val.Cat( s );
1252     }
1253     else  {
1254         AddError( _T( "String value \'%s\' cannot follow another value"), s );
1255     }
1256 
1257     // store the input text's line number when the string was stored in 'val'
1258     val.SetLineNo( m_lineNo );
1259 
1260     // read the next char after the closing quotes and returns it
1261     if ( ch >= 0 )  {
1262         ch = ReadChar( is );
1263     }
1264     return ch;
1265 }
1266 
1267 //! Reads a token string
1268 /*!
1269  This function is called by the ReadValue() when the
1270  first character encontered is not a special char
1271  and it is not a double-quote.
1272  The only possible type is a literal or a number which
1273  all lies in the US-ASCII charset so their UTF-8 encodeing
1274  is the same as US-ASCII.
1275  The function simply reads one byte at a time from the stream
1276  and appends them to a \b wxString object.
1277  Returns the next character read.
1278 
1279  A token cannot include \e unicode \e escaped \e sequences
1280  so this function does not try to interpret such sequences.
1281 
1282  @param is    the input stream
1283  @param ch    the character read by DoRead
1284  @param s    the string object that contains the token read
1285  @return -1 in case of errors or EOF
1286 */
1287 int
ReadToken(wxInputStream & is,int ch,wxString & s)1288 wxJSONReader::ReadToken( wxInputStream& is, int ch, wxString& s )
1289 {
1290     int nextCh = ch;
1291     while ( nextCh >= 0 ) {
1292         switch ( nextCh ) {
1293             case ' ' :
1294             case ',' :
1295             case ':' :
1296             case '[' :
1297             case ']' :
1298             case '{' :
1299             case '}' :
1300             case '\t' :
1301             case '\n' :
1302             case '\r' :
1303             case '\b' :
1304                 wxLogTrace( traceMask, _T("(%s) line=%d col=%d"),
1305                      __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1306                 wxLogTrace( traceMask, _T("(%s) token read=%s"),
1307                      __PRETTY_FUNCTION__, s.c_str() );
1308                 return nextCh;
1309                 break;
1310             default :
1311                 s.Append( (unsigned char) nextCh, 1 );
1312                 break;
1313         }
1314         // read the next character
1315         nextCh = ReadChar( is );
1316     }
1317     wxLogTrace( traceMask, _T("(%s) EOF on line=%d col=%d"),
1318          __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1319     wxLogTrace( traceMask, _T("(%s) EOF - token read=%s"),
1320              __PRETTY_FUNCTION__, s.c_str() );
1321     return nextCh;
1322 }
1323 
1324 //! Read a value from input stream
1325 /*!
1326  The function is called by DoRead() when it enconters a char that is
1327  not a special char nor a double-quote.
1328  It assumes that the string is a numeric value or a literal
1329  boolean value and stores it in the wxJSONValue object \c val.
1330 
1331  The function also checks that \c val is of type wxJSONTYPE_INVALID otherwise
1332  an error is reported becasue a value cannot follow another value:
1333  maybe a (,) or (:) is missing.
1334 
1335  If the literal starts with a digit, a plus or minus sign, the function
1336  tries to interpret it as a number. The following are tried by the function,
1337  in this order:
1338 
1339  \li if the literal starts with a digit: signed integer, then unsigned integer
1340         and finally double conversion is tried
1341  \li if the literal starts with a minus sign: signed integer, then  double
1342         conversion is tried
1343  \li if the literal starts with plus sign: unsigned integer
1344         then double conversion is tried
1345 
1346  Returns the next character or -1 on EOF.
1347 */
1348 int
ReadValue(wxInputStream & is,int ch,wxJSONValue & val)1349 wxJSONReader::ReadValue( wxInputStream& is, int ch, wxJSONValue& val )
1350 {
1351     wxString s;
1352     int nextCh = ReadToken( is, ch, s );
1353     wxLogTrace( traceMask, _T("(%s) value=%s"),
1354              __PRETTY_FUNCTION__, val.AsString().c_str() );
1355 
1356     if ( val.IsValid() )  {
1357         AddError( _T( "Value \'%s\' cannot follow a value: \',\' or \':\' missing?"), s );
1358         return nextCh;
1359     }
1360 
1361     // variables used for converting numeric values
1362     bool r;  double d;
1363 #if defined( wxJSON_64BIT_INT )
1364     wxInt64  i64;
1365     wxUint64 ui64;
1366 #else
1367     unsigned long int ul; long int l;
1368 #endif
1369 
1370     // first try the literal strings lowercase and nocase
1371     if ( s == _T("null") ) {
1372         val.SetType( wxJSONTYPE_NULL );
1373         wxLogTrace( traceMask, _T("(%s) value = NULL"),  __PRETTY_FUNCTION__ );
1374         return nextCh;
1375     }
1376     else if ( s.CmpNoCase( _T( "null" )) == 0 ) {
1377         wxLogTrace( traceMask, _T("(%s) value = NULL"),  __PRETTY_FUNCTION__ );
1378         AddWarning( wxJSONREADER_CASE, _T( "the \'null\' literal must be lowercase" ));
1379         val.SetType( wxJSONTYPE_NULL );
1380         return nextCh;
1381     }
1382     else if ( s == _T("true") ) {
1383         wxLogTrace( traceMask, _T("(%s) value = TRUE"),  __PRETTY_FUNCTION__ );
1384         val = true;
1385         return nextCh;
1386     }
1387     else if ( s.CmpNoCase( _T( "true" )) == 0 ) {
1388         wxLogTrace( traceMask, _T("(%s) value = TRUE"),  __PRETTY_FUNCTION__ );
1389         AddWarning( wxJSONREADER_CASE, _T( "the \'true\' literal must be lowercase" ));
1390         val = true;
1391         return nextCh;
1392     }
1393     else if ( s == _T("false") ) {
1394         wxLogTrace( traceMask, _T("(%s) value = FALSE"),  __PRETTY_FUNCTION__ );
1395         val = false;
1396         return nextCh;
1397     }
1398     else if ( s.CmpNoCase( _T( "false" )) == 0 ) {
1399         wxLogTrace( traceMask, _T("(%s) value = FALSE"),  __PRETTY_FUNCTION__ );
1400         AddWarning( wxJSONREADER_CASE, _T( "the \'false\' literal must be lowercase" ));
1401         val = false;
1402         return nextCh;
1403     }
1404 
1405 
1406     // try to convert to a number if the token starts with a digit, a plus or a minus
1407     // sign. The function first states what type of conversion are tested:
1408     //    1. first signed integer (not if 'ch' == '+')
1409     //    2. unsigned integer (not if 'ch' == '-')
1410     //    3. finally double
1411     bool tSigned = true, tUnsigned = true, tDouble = true;
1412     switch ( ch )  {
1413         case '0' :
1414         case '1' :
1415         case '2' :
1416         case '3' :
1417         case '4' :
1418         case '5' :
1419         case '6' :
1420         case '7' :
1421         case '8' :
1422         case '9' :
1423             // first try a signed integer, then a unsigned integer, then a double
1424             break;
1425 
1426         case '+' :
1427             // the plus sign forces a unsigned integer
1428             tSigned = false;
1429             break;
1430 
1431         case '-' :
1432             // try signed and double
1433             tUnsigned = false;
1434             break;
1435         default :
1436             AddError( _T( "Literal \'%s\' is incorrect (did you forget quotes?)"), s );
1437             return nextCh;
1438     }
1439 
1440     if ( tSigned )    {
1441     #if defined( wxJSON_64BIT_INT)
1442         r = Strtoll( s, &i64 );
1443         wxLogTrace( traceMask, _T("(%s) convert to wxInt64 result=%d"),
1444                   __PRETTY_FUNCTION__, r );
1445         if ( r )  {
1446             // store the value
1447             val = i64;
1448             return nextCh;
1449         }
1450     #else
1451         r = s.ToLong( &l );
1452         wxLogTrace( traceMask, _T("(%s) convert to int result=%d"),
1453                  __PRETTY_FUNCTION__, r );
1454         if ( r )  {
1455             // store the value
1456             val = (int) l;
1457             return nextCh;
1458         }
1459     #endif
1460     }
1461 
1462     if ( tUnsigned )    {
1463     #if defined( wxJSON_64BIT_INT)
1464         r = Strtoull( s, &ui64 );
1465         wxLogTrace( traceMask, _T("(%s) convert to wxUint64 result=%d"),
1466                               __PRETTY_FUNCTION__, r );
1467         if ( r )  {
1468             // store the value
1469             val = ui64;
1470             return nextCh;
1471         }
1472     #else
1473         r = s.ToULong( &ul );
1474         wxLogTrace( traceMask, _T("(%s) convert to int result=%d"),
1475                          __PRETTY_FUNCTION__, r );
1476         if ( r )  {
1477             // store the value
1478             val = (unsigned int) ul;
1479             return nextCh;
1480         }
1481     #endif
1482     }
1483 
1484     if ( tDouble )    {
1485         r = s.ToDouble( &d );
1486         wxLogTrace( traceMask, _T("(%s) convert to double result=%d"),
1487                  __PRETTY_FUNCTION__, r );
1488         if ( r )  {
1489             // store the value
1490             val = d;
1491             return nextCh;
1492         }
1493     }
1494 
1495 
1496     // the value is not syntactically correct
1497     AddError( _T( "Literal \'%s\' is incorrect (did you forget quotes?)"), s );
1498     return nextCh;
1499   return nextCh;
1500 }
1501 
1502 
1503 //! Read a 4-hex-digit unicode character.
1504 /*!
1505  The function is called by ReadString() when the \b \\u sequence is
1506  encontered; the sequence introduces a control character in the form:
1507  \code
1508      \uXXXX
1509  \endcode
1510  where XXXX is a four-digit hex code..
1511  The function reads four chars from the input UTF8 stream by calling ReadChar()
1512  four times: if EOF is encontered before reading four chars, -1 is
1513  also returned and no sequence interpretation is performed.
1514  The function stores the 4 hexadecimal digits in the \c uesBuffer parameter.
1515 
1516  Returns the character after the hex sequence or -1 if EOF.
1517 
1518  \b NOTICE: although the JSON syntax states that only control characters
1519  are represented in this way, the wxJSON library reads and recognizes all
1520  unicode characters in the BMP.
1521 */
1522 int
ReadUES(wxInputStream & is,char * uesBuffer)1523 wxJSONReader::ReadUES( wxInputStream& is, char* uesBuffer )
1524 {
1525     int ch;
1526     for ( int i = 0; i < 4; i++ )  {
1527         ch = ReadChar( is );
1528         if ( ch < 0 )  {
1529             return ch;
1530         }
1531         uesBuffer[i] = (unsigned char) ch;
1532     }
1533     uesBuffer[4] = 0;    // makes a ASCIIZ string
1534 
1535     return 0;
1536 }
1537 
1538 
1539 //! The function appends a Unice Escaped Sequence to the temporary UTF8 buffer
1540 /*!
1541  This function is called by \c ReadString() when a \e unicode \e escaped
1542  \e sequence is read from the input text as for example:
1543 
1544  \code
1545   \u0001
1546  \endcode
1547 
1548  which represents a control character.
1549  The \c uesBuffer parameter contains the 4 hexadecimal digits that are
1550  read from \c ReadUES.
1551 
1552  The function tries to convert the 4 hex digits in a \b wchar_t character
1553  which is appended to the memory buffer \c utf8Buff after converting it
1554  to UTF-8.
1555 
1556  If the conversion from hexadecimal fails, the function does not
1557  store the character in the UTF-8 buffer and an error is reported.
1558  The function is the same in ANSI and Unicode.
1559  Returns -1 if the buffer does not contain valid hex digits.
1560  sequence. On success returns ZERO.
1561 
1562  @param utf8Buff    the UTF-8 buffer to which the control char is written
1563  @param uesBuffer    the four-hex-digits read from the input text
1564  @return ZERO on success, -1 if the four-hex-digit buffer cannot be converted
1565 */
1566 int
AppendUES(wxMemoryBuffer & utf8Buff,const char * uesBuffer)1567 wxJSONReader::AppendUES( wxMemoryBuffer& utf8Buff, const char* uesBuffer )
1568 {
1569     unsigned long l;
1570     int r = sscanf( uesBuffer, "%lx", &l );    // r is the assigned items
1571     if ( r != 1  )  {
1572         AddError( _T( "Invalid Unicode Escaped Sequence"));
1573         return -1;
1574     }
1575     wxLogTrace( traceMask, _T("(%s) unicode sequence=%s code=%ld"),
1576               __PRETTY_FUNCTION__, uesBuffer, l );
1577 
1578     wchar_t ch = (wchar_t) l;
1579     char buffer[16];
1580     size_t len = wxConvUTF8.FromWChar( buffer, 10, &ch, 1 );
1581 
1582     // seems that the wxMBConv classes always appends a NULL byte to
1583     // the converted buffer
1584     if ( len > 1 )    {
1585         len = len - 1;
1586     }
1587     utf8Buff.AppendData( buffer, len );
1588 
1589     // sould never fail
1590     wxASSERT( len != wxCONV_FAILED );
1591     return 0;
1592 }
1593 
1594 //! Store the comment string in the value it refers to.
1595 /*!
1596  The function searches a suitable value object for storing the
1597  comment line that was read by the parser and temporarly
1598  stored in \c m_comment.
1599  The function searches the three values pointed to by:
1600  \li \c m_next
1601  \li \c m_current
1602  \li \c m_lastStored
1603 
1604  The value that the comment refers to is:
1605 
1606  \li if the comment is on the same line as one of the values, the comment
1607     refer to that value and it is stored as \b inline.
1608  \li otherwise, if the comment flag is wxJSONREADER_COMMENTS_BEFORE, the comment lines
1609     are stored in the value pointed to by \c m_next
1610  \li otherwise, if the comment flag is wxJSONREADER_COMMENTS_AFTER, the comment lines
1611     are stored in the value pointed to by \c m_current or m_latStored
1612 
1613  Note that the comment line is only stored if the wxJSONREADER_STORE_COMMENTS
1614  flag was used when the parser object was constructed; otherwise, the
1615  function does nothing and immediatly returns.
1616  Also note that if the comment line has to be stored but the
1617  function cannot find a suitable value to add the comment line to,
1618  an error is reported (note: not a warning but an error).
1619 */
1620 void
StoreComment(const wxJSONValue * parent)1621 wxJSONReader::StoreComment( const wxJSONValue* parent )
1622 {
1623     wxLogTrace( storeTraceMask, _T("(%s) m_comment=%s"),  __PRETTY_FUNCTION__, m_comment.c_str());
1624     wxLogTrace( storeTraceMask, _T("(%s) m_flags=%d m_commentLine=%d"),
1625               __PRETTY_FUNCTION__, m_flags, m_commentLine );
1626     wxLogTrace( storeTraceMask, _T("(%s) m_current=%p"), __PRETTY_FUNCTION__, m_current );
1627     wxLogTrace( storeTraceMask, _T("(%s) m_next=%p"), __PRETTY_FUNCTION__, m_next );
1628     wxLogTrace( storeTraceMask, _T("(%s) m_lastStored=%p"), __PRETTY_FUNCTION__, m_lastStored );
1629 
1630     // first check if the 'store comment' bit is on
1631     if ( (m_flags & wxJSONREADER_STORE_COMMENTS) == 0 )  {
1632         m_comment.clear();
1633         return;
1634     }
1635 
1636     // check if the comment is on the same line of one of the
1637     // 'current', 'next' or 'lastStored' value
1638     if ( m_current != 0 )  {
1639         wxLogTrace( storeTraceMask, _T("(%s) m_current->lineNo=%d"),
1640              __PRETTY_FUNCTION__, m_current->GetLineNo() );
1641         if ( m_current->GetLineNo() == m_commentLine ) {
1642             wxLogTrace( storeTraceMask, _T("(%s) comment added to \'m_current\' INLINE"),
1643              __PRETTY_FUNCTION__ );
1644             m_current->AddComment( m_comment, wxJSONVALUE_COMMENT_INLINE );
1645             m_comment.clear();
1646             return;
1647         }
1648     }
1649     if ( m_next != 0 )  {
1650         wxLogTrace( storeTraceMask, _T("(%s) m_next->lineNo=%d"),
1651              __PRETTY_FUNCTION__, m_next->GetLineNo() );
1652         if ( m_next->GetLineNo() == m_commentLine ) {
1653             wxLogTrace( storeTraceMask, _T("(%s) comment added to \'m_next\' INLINE"),
1654                  __PRETTY_FUNCTION__ );
1655             m_next->AddComment( m_comment, wxJSONVALUE_COMMENT_INLINE );
1656             m_comment.clear();
1657             return;
1658         }
1659     }
1660     if ( m_lastStored != 0 )  {
1661         wxLogTrace( storeTraceMask, _T("(%s) m_lastStored->lineNo=%d"),
1662              __PRETTY_FUNCTION__, m_lastStored->GetLineNo() );
1663         if ( m_lastStored->GetLineNo() == m_commentLine ) {
1664             wxLogTrace( storeTraceMask, _T("(%s) comment added to \'m_lastStored\' INLINE"),
1665                  __PRETTY_FUNCTION__ );
1666             m_lastStored->AddComment( m_comment, wxJSONVALUE_COMMENT_INLINE );
1667             m_comment.clear();
1668             return;
1669         }
1670     }
1671 
1672     // if comment is BEFORE, store the comment in the 'm_next'
1673     // or 'm_current' value
1674     // if comment is AFTER, store the comment in the 'm_lastStored'
1675     // or 'm_current' value
1676 
1677     if ( m_flags & wxJSONREADER_COMMENTS_AFTER )  {  // comment AFTER
1678         if ( m_current )  {
1679             if ( m_current == parent || !m_current->IsValid()) {
1680                 AddError( _T("Cannot find a value for storing the comment (flag AFTER)"));
1681             }
1682             else  {
1683                 wxLogTrace( storeTraceMask, _T("(%s) comment added to m_current (AFTER)"),
1684                      __PRETTY_FUNCTION__ );
1685                 m_current->AddComment( m_comment, wxJSONVALUE_COMMENT_AFTER );
1686             }
1687         }
1688         else if ( m_lastStored )  {
1689             wxLogTrace( storeTraceMask, _T("(%s) comment added to m_lastStored (AFTER)"),
1690                  __PRETTY_FUNCTION__ );
1691             m_lastStored->AddComment( m_comment, wxJSONVALUE_COMMENT_AFTER );
1692         }
1693         else   {
1694             wxLogTrace( storeTraceMask,
1695                 _T("(%s) cannot find a value for storing the AFTER comment"), __PRETTY_FUNCTION__ );
1696             AddError(_T("Cannot find a value for storing the comment (flag AFTER)"));
1697         }
1698     }
1699     else {       // comment BEFORE can only be added to the 'next' value
1700         if ( m_next )  {
1701             wxLogTrace( storeTraceMask, _T("(%s) comment added to m_next (BEFORE)"),
1702                  __PRETTY_FUNCTION__ );
1703             m_next->AddComment( m_comment, wxJSONVALUE_COMMENT_BEFORE );
1704         }
1705         else   {
1706             // cannot find a value for storing the comment
1707             AddError(_T("Cannot find a value for storing the comment (flag BEFORE)"));
1708         }
1709     }
1710     m_comment.clear();
1711 }
1712 
1713 
1714 //! Return the number of bytes that make a character in stream input
1715 /*!
1716  This function returns the number of bytes that represent a unicode
1717  code point in various encoding.
1718  For example, if the input stream is UTF-32 the function returns 4.
1719  Because the only recognized format for streams is UTF-8 the function
1720  just calls UTF8NumBytes() and returns.
1721  The function is, actually, not used at all.
1722 
1723 */
1724 int
NumBytes(char ch)1725 wxJSONReader::NumBytes( char ch )
1726 {
1727     int n = UTF8NumBytes( ch );
1728     return n;
1729 }
1730 
1731 //! Compute the number of bytes that makes a UTF-8 encoded wide character.
1732 /*!
1733  The function counts the number of '1' bit in the character \c ch and
1734  returns it.
1735  The UTF-8 encoding specifies the number of bytes needed by a wide character
1736  by coding it in the first byte. See below.
1737 
1738  Note that if the character does not contain a valid UTF-8 encoding
1739  the function returns -1.
1740 
1741 \code
1742    UCS-4 range (hex.)    UTF-8 octet sequence (binary)
1743    -------------------   -----------------------------
1744    0000 0000-0000 007F   0xxxxxxx
1745    0000 0080-0000 07FF   110xxxxx 10xxxxxx
1746    0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1747    0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1748    0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1749    0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
1750 \endcode
1751 */
1752 int
UTF8NumBytes(char ch)1753 wxJSONReader::UTF8NumBytes( char ch )
1754 {
1755     int num = 0;    // the counter of '1' bits
1756     for ( int i = 0; i < 8; i++ )  {
1757         if ( (ch & 0x80) == 0 )  {
1758             break;
1759         }
1760         ++num;
1761         ch = ch << 1;
1762     }
1763 
1764     // note that if the char contains more than six '1' bits it is not
1765     // a valid UTF-8 encoded character
1766     if ( num > 6 )  {
1767         num = -1;
1768     }
1769     else if ( num == 0 )  {
1770         num = 1;
1771     }
1772     return num;
1773 }
1774 
1775 //! Convert a UTF-8 memory buffer one char at a time
1776 /*!
1777  This function is used in ANSI mode when input from a stream is in UTF-8
1778  format and the UTF-8 buffer read cannot be converted to the locale
1779  wxString object.
1780  The function performs a char-by-char conversion of the buffer and appends
1781  every representable character to the string \c s.
1782  Characters that cannot be represented are stored as \e unicode \e escaped
1783  \e sequences in the form:
1784  \code
1785    \uXXXX
1786  \endcode
1787  where XXXX is a for-hex-digits Unicode code point.
1788  The function returns the number of characters that cannot be represented
1789  in the current locale.
1790 */
1791 int
ConvertCharByChar(wxString & s,const wxMemoryBuffer & utf8Buffer)1792 wxJSONReader::ConvertCharByChar( wxString& s, const wxMemoryBuffer& utf8Buffer )
1793 {
1794     size_t len  = utf8Buffer.GetDataLen();
1795     char*  buff = (char*) utf8Buffer.GetData();
1796     char* buffEnd = buff + len;
1797 
1798     int result = 0;
1799     char temp[16];    // the UTF-8 code-point
1800 
1801     while ( buff < buffEnd )    {
1802         temp[0] = *buff;    // the first UTF-8 code-unit
1803         // compute the number of code-untis that make one UTF-8 code-point
1804         int numBytes = NumBytes( *buff );
1805         ++buff;
1806         for ( int i = 1; i < numBytes; i++ )    {
1807             if ( buff >= buffEnd )    {
1808                 break;
1809             }
1810             temp[i] = *buff;    // the first UTF-8 code-unit
1811             ++buff;
1812         }
1813         //if ( buff >= buffEnd )    {
1814         //    break;
1815         //}
1816         // now convert 'temp' to a wide-character
1817         wchar_t dst[10];
1818         size_t outLength = wxConvUTF8.ToWChar( dst, 10, temp, numBytes );
1819 
1820         // now convert the wide char to a locale dependent character
1821         // len = wxConvLocal.FromWChar( temp, 16, dst, outLength );
1822         // len = wxConviso8859_1.FromWChar( temp, 16, dst, outLength );
1823         len = wxConvLibc.FromWChar( temp, 16, dst, outLength );
1824         if ( len == wxCONV_FAILED )    {
1825             ++result;
1826             wxString t;
1827             t.Printf( _T( "\\u%04X"), (int) dst[0] );
1828             s.Append( t );
1829         }
1830         else    {
1831             s.Append( temp[0], 1 );
1832         }
1833     }        // end while
1834     return result;
1835 }
1836 
1837 //! Read a memory buffer type
1838 /*!
1839  This function is called by DoRead() when the single-quote character is
1840  encontered which starts a \e memory \e buffer type.
1841  This type is a \b wxJSON extension so the function emits a warning
1842  when such a type encontered.
1843  If the reader is constructed without the \c wxJSONREADER_MEMORYBUFF flag
1844  then the warning becomes an error.
1845  To know more about this JSON syntax extension read \ref wxjson_tutorial_memorybuff
1846 
1847  @param is the input stream
1848  @param val the JSON value that will hold the memory buffer value
1849  @return the last char read or -1 in case of EOF
1850 */
1851 
1852 union byte
1853 {
1854     unsigned char c[2];
1855     short int b;
1856 };
1857 
1858 int
ReadMemoryBuff(wxInputStream & is,wxJSONValue & val)1859 wxJSONReader::ReadMemoryBuff( wxInputStream& is, wxJSONValue& val )
1860 {
1861     static const wxChar* membuffError = _T("the \'memory buffer\' type contains %d invalid digits" );
1862 
1863     AddWarning( wxJSONREADER_MEMORYBUFF, _T( "the \'memory buffer\' type is not valid JSON text" ));
1864 
1865     wxMemoryBuffer buff;
1866     int ch = 0; int errors = 0;
1867     unsigned char byte = 0;
1868     while ( ch >= 0 ) {
1869         ch = ReadChar( is );
1870         if ( ch < 0 )  {
1871             break;
1872         }
1873         if ( ch == '\'' )  {
1874             break;
1875         }
1876         // the conversion is done two chars at a time
1877         unsigned char c1 = (unsigned char) ch;
1878         ch = ReadChar( is );
1879         if ( ch < 0 )  {
1880             break;
1881         }
1882         unsigned char c2 = (unsigned char) ch;
1883         c1 -= '0';
1884         c2 -= '0';
1885         if ( c1 > 9 )  {
1886             c1 -= 7;
1887         }
1888         if ( c2 > 9 )  {
1889             c2 -= 7;
1890         }
1891         if ( c1 > 15 )  {
1892             ++errors;
1893         }
1894         else if ( c2 > 15 )  {
1895             ++errors;
1896         }
1897         else {
1898             byte = (c1 * 16) + c2;
1899             buff.AppendByte( byte );
1900         }
1901     }   // end while
1902 
1903     if ( errors > 0 )  {
1904         wxString err;
1905         err.Printf( membuffError, errors );
1906         AddError( err );
1907     }
1908 
1909 
1910     // now assign the memory buffer object to the JSON-value 'value'
1911     // must check that:
1912     //   'value'  is invalid OR
1913     //   'value'  is a memory buffer; concatenate it
1914     if ( !val.IsValid() )   {
1915         wxLogTrace( traceMask, _T("(%s) assigning the memory buffer to value"), __PRETTY_FUNCTION__ );
1916         val = buff ;
1917     }
1918     else if ( val.IsMemoryBuff() )  {
1919         wxLogTrace( traceMask, _T("(%s) concatenate memory buffer to value"), __PRETTY_FUNCTION__ );
1920         val.Cat( buff );
1921     }
1922     else  {
1923         AddError( _T( "Memory buffer value cannot follow another value") );
1924     }
1925 
1926     // store the input text's line number when the string was stored in 'val'
1927     val.SetLineNo( m_lineNo );
1928 
1929     // read the next char after the closing quotes and returns it
1930     if ( ch >= 0 )  {
1931         ch = ReadChar( is );
1932     }
1933     return ch;
1934 }
1935 
1936 
1937 
1938 
1939 #if defined( wxJSON_64BIT_INT )
1940 //! Converts a decimal string to a 64-bit signed integer
1941 /*!
1942  This function implements a simple variant
1943  of the \b strtoll C-library function.
1944  I needed this implementation because the wxString::To(U)LongLong
1945  function does not work on my system:
1946 
1947   \li GNU/Linux Fedora Core 6
1948   \li GCC version 4.1.1
1949   \li libc.so.6
1950 
1951  The wxWidgets library (actually I have installed version 2.8.7)
1952  relies on \b strtoll in order to do the conversion from a string
1953  to a long long integer but, in fact, it does not work because
1954  the 'wxHAS_STRTOLL' macro is not defined on my system.
1955  The problem only affects the Unicode builds while it seems
1956  that the wxString::To(U)LongLong function works in ANSI builds.
1957 
1958  Note that this implementation is not a complete substitute of the
1959  strtoll function because it only converts decimal strings (only base
1960  10 is implemented).
1961 
1962  @param str the string that contains the decimal literal
1963  @param i64 the pointer to long long which holds the converted value
1964 
1965  @return TRUE if the conversion succeeds
1966 */
1967 bool
Strtoll(const wxString & str,wxInt64 * i64)1968 wxJSONReader::Strtoll( const wxString& str, wxInt64* i64 )
1969 {
1970     wxChar sign = ' ';
1971     wxUint64 ui64 = 0;
1972     bool r = DoStrto_ll( str, &ui64, &sign );
1973 
1974     // check overflow for signed long long
1975     switch ( sign )  {
1976         case '-' :
1977             if ( ui64 > (wxUint64) LLONG_MAX + 1 )  {
1978                 r = false;
1979             }
1980             else  {
1981                 *i64 = (wxInt64) (ui64 * -1);
1982             }
1983             break;
1984 
1985         // case '+' :
1986         default :
1987             if ( ui64 > LLONG_MAX )  {
1988                 r = false;
1989             }
1990             else  {
1991                 *i64 = (wxInt64) ui64;
1992             }
1993             break;
1994     }
1995     return r;
1996 }
1997 
1998 
1999 //! Converts a decimal string to a 64-bit unsigned integer.
2000 /*!
2001  Similar to \c Strtoll but for unsigned integers
2002 */
2003 bool
Strtoull(const wxString & str,wxUint64 * ui64)2004 wxJSONReader::Strtoull( const wxString& str, wxUint64* ui64 )
2005 {
2006     wxChar sign = ' ';
2007     bool r = DoStrto_ll( str, ui64, &sign );
2008     if ( sign == '-' )  {
2009         r = false;
2010     }
2011     return r;
2012 }
2013 
2014 //! Perform the actual conversion from a string to a 64-bit integer
2015 /*!
2016  This function is called internally by the \c Strtoll and \c Strtoull functions
2017  and it does the actual conversion.
2018  The function is also able to check numeric overflow.
2019 
2020  @param str the string that has to be converted
2021  @param ui64 the pointer to a unsigned long long that holds the converted value
2022  @param sign the pointer to a wxChar character that will get the sign of the literal string, if any
2023  @return TRUE if the conversion succeeds
2024 */
2025 bool
DoStrto_ll(const wxString & str,wxUint64 * ui64,wxChar * sign)2026 wxJSONReader::DoStrto_ll( const wxString& str, wxUint64* ui64, wxChar* sign )
2027 {
2028   // the conversion is done by multiplying the individual digits
2029   // in reverse order to the corresponding power of 10
2030   //
2031   //  10's power:  987654321.9876543210
2032   //
2033   // LLONG_MAX:     9223372036854775807
2034   // LLONG_MIN:    -9223372036854775808
2035   // ULLONG_MAX:   18446744073709551615
2036   //
2037   // the function does not take into account the sign: only a
2038   // unsigned long long int is returned
2039 
2040     int maxDigits = 20;       // 20 + 1 (for the sign)
2041 
2042     wxUint64 power10[] = {
2043     wxULL(1),
2044     wxULL(10),
2045     wxULL(100),
2046     wxULL(1000),
2047     wxULL(10000),
2048     wxULL(100000),
2049     wxULL(1000000),
2050     wxULL(10000000),
2051     wxULL(100000000),
2052     wxULL(1000000000),
2053     wxULL(10000000000),
2054     wxULL(100000000000),
2055     wxULL(1000000000000),
2056     wxULL(10000000000000),
2057     wxULL(100000000000000),
2058     wxULL(1000000000000000),
2059     wxULL(10000000000000000),
2060     wxULL(100000000000000000),
2061     wxULL(1000000000000000000),
2062     wxULL(10000000000000000000)
2063   };
2064 
2065 
2066     wxUint64 temp1 = wxULL(0);   // the temporary converted integer
2067 
2068     int strLen = str.length();
2069     if ( strLen == 0 )  {
2070         // an empty string is converted to a ZERO value: the function succeeds
2071         *ui64 = wxLL(0);
2072         return true;
2073     }
2074 
2075     int index = 0;
2076     wxChar ch = str[0];
2077     if ( ch == '+' || ch == '-' )  {
2078         *sign = ch;
2079         ++index;
2080         ++maxDigits;
2081     }
2082 
2083     if ( strLen > maxDigits )  {
2084         return false;
2085     }
2086 
2087     // check the overflow: check the string length and the individual digits
2088     // of the string; the overflow is checked for unsigned long long
2089     if ( strLen == maxDigits )  {
2090         wxString uLongMax( _T("18446744073709551615"));
2091         int j = 0;
2092         for ( int i = index; i < strLen - 1; i++ )  {
2093             ch = str[i];
2094             if ( ch < '0' || ch > '9' ) {
2095                 return false;
2096             }
2097             if ( ch > uLongMax[j] ) {
2098                 return false;
2099             }
2100             if ( ch < uLongMax[j] ) {
2101                 break;
2102             }
2103             ++j;
2104         }
2105     }
2106 
2107     // get the digits in the reverse order and multiply them by the
2108     // corresponding power of 10
2109     int exponent = 0;
2110     for ( int i = strLen - 1; i >= index; i-- )   {
2111         wxChar ch = str[i];
2112         if ( ch < '0' || ch > '9' ) {
2113             return false;
2114         }
2115         ch = ch - '0';
2116         // compute the new temporary value
2117         temp1 += ch * power10[exponent];
2118         ++exponent;
2119     }
2120     *ui64 = temp1;
2121     return true;
2122 }
2123 
2124 #endif       // defined( wxJSON_64BIT_INT )
2125 
2126 /*
2127 {
2128 }
2129 */
2130 
2131 
2132 
2133