1 /////////////////////////////////////////////////////////////////////////////
2 // Name:        jsonreader.cpp
3 // Purpose:     the wxJSONReader class: a JSON text parser
4 // Author:      Luciano Cattani
5 // Created:     2007/10/14
6 // RCS-ID:      $Id: jsonreader.cpp,v 1.12 2008/03/12 10:48:19 luccat Exp $
7 // Copyright:   (c) 2007 Luciano Cattani
8 // Licence:     wxWidgets licence
9 /////////////////////////////////////////////////////////////////////////////
10 
11 #ifdef NDEBUG
12 // make wxLogTrace a noop if no debug set, it's really slow
13 // must be defined before including debug.h
14 #define wxDEBUG_LEVEL 0
15 #endif
16 
17 #include <wx/jsonreader.h>
18 
19 #include <wx/mstream.h>
20 #include <wx/sstream.h>
21 #include <wx/debug.h>
22 #include <wx/log.h>
23 
24 
25 
26 /*! \class wxJSONReader
27  \brief The JSON parser
28 
29  The class is a JSON parser which reads a JSON formatted text and stores
30  values in the \c wxJSONValue structure.
31  The ctor accepts two parameters: the \e style flag, which controls how
32  much error-tolerant should the parser be and an integer which is
33  the maximum number of errors and warnings that have to be reported
34  (the default is 30).
35 
36  If the JSON text document does not contain an open/close JSON character the
37  function returns an \b invalid value object; in other words, the
38  wxJSONValue::IsValid() function returns FALSE.
39  This is the case of a document that is empty or contains only
40  whitespaces or comments.
41  If the document contains a starting object/array character immediatly
42  followed by a closing object/array character
43  (i.e.: \c {} ) then the function returns an \b empty array or object
44  JSON value.
45  This is a valid JSON object of type wxJSONTYPE_OBJECT or wxJSONTYPE_ARRAY
46  whose wxJSONValue::Size() function returns ZERO.
47 
48  \par JSON text
49 
50  The wxJSON parser just skips all characters read from the
51  input JSON text until the start-object '{' or start-array '[' characters
52  are encontered (see the GetStart() function).
53  This means that the JSON input text may contain anything
54  before the first start-object/array character except these two chars themselves
55  unless they are included in a C/C++ comment.
56  Comment lines that apear before the first start array/object character,
57  are non ignored if the parser is constructed with the wxJSONREADER_STORE_COMMENT
58  flag: they are added to the comment's array of the root JSON value.
59 
60  Note that the parsing process stops when the internal DoRead() function
61  returns. Because that function is recursive, the top-level close-object
62  '}' or close-array ']' character cause the top-level DoRead() function
63  to return thus stopping the parsing process regardless the EOF condition.
64  This means that the JSON input text may contain anything \b after
65  the top-level close-object/array character.
66  Here are some examples:
67 
68  Returns a wxJSONTYPE_INVALID value (invalid JSON value)
69  \code
70    // this text does not contain an open array/object character
71  \endcode
72 
73  Returns a wxJSONTYPE_OBJECT value of Size() = 0
74  \code
75    {
76    }
77  \endcode
78 
79  Returns a wxJSONTYPE_ARRAY value of Size() = 0
80  \code
81    [
82    ]
83  \endcode
84 
85  Text before and after the top-level open/close characters is ignored.
86  \code
87    This non-JSON text does not cause the parser to report errors or warnings
88    {
89    }
90    This non-JSON text does not cause the parser to report errors or warnings
91  \endcode
92 
93 
94  \par Extensions
95 
96  The wxJSON parser recognizes all JSON text plus some extensions
97  that are not part of the JSON syntax but that many other JSON
98  implementations do recognize.
99  If the input text contains the following non-JSON text, the parser
100  reports the situation as \e warnings and not as \e errors unless
101  the parser object was constructed with the wxJSONREADER_STRICT
102  flag. In the latter case the wxJSON parser is not tolerant.
103 
104  \li C/C++ comments: the parser recognizes C and C++ comments.
105     Comments can optionally be stored in the value they refer
106     to and can also be written back to the JSON text document.
107     To know more about comment storage see \ref wxjson_comments
108 
109  \li case tolerance: JSON syntax states that the literals \c null,
110     \c true and \c false must be lowercase; the wxJSON parser
111     also recognizes mixed case literals such as, for example,
112     \b Null or \b FaLSe.  A \e warning is emitted.
113 
114  \li wrong or missing closing character: wxJSON parser is tolerant
115     about the object / array closing character. When an open-array
116     character '[' is encontered, the parser expects the
117     corresponding close-array character ']'. If the character
118     encontered is a close-object char '}' a warning is reported.
119     A warning is also reported if the character is missing when
120     the end-of-file is reached.
121 
122  \li multi-line strings: this feature allows a JSON string type to be
123     splitted in two or more lines as in the standard C/C++
124     languages. The drawback is that this feature is error-prone
125     and you have to use it with care.
126     For more info about this topic read \ref wxjson_tutorial_style_split
127 
128  Note that you can control how much error-tolerant should the parser be
129  and also you can specify how many and what extensions are recognized.
130  See the constructor's parameters for more details.
131 
132  \par Unicode vs ANSI
133 
134  The parser can read JSON text from two very different kind of objects:
135 
136  \li a string object (\b wxString)
137  \li a stream object (\b wxInputStream)
138 
139  When the input is from a string object, the character represented in the
140  string is platform- and mode- dependant; in other words, characters are
141  represented differently: in ANSI builds they depend on the charset in use
142  and in Unicode builds they depend on the platform (UCS-2 on win32, UCS-4
143  or UTF-8 on GNU/Linux).
144 
145  When the input is from a stream object, the only recognized encoding format
146  is UTF-8 for both ANSI and Unicode builds.
147 
148  \par Example:
149 
150  \code
151   wxJSONValue  value;
152   wxJSONReader reader;
153 
154   // open a text file that contains the UTF-8 encoded JSON text
155   wxFFileInputStream jsonText( _T("filename.utf8"), _T("r"));
156 
157   // read the file
158   int numErrors = reader.Parse( jsonText, &value );
159 
160   if ( numErrors > 0 )  {
161     ::MessageBox( _T("Error reading the input file"));
162   }
163  \endcode
164 
165  Starting from version 1.1.0 the wxJSON reader and the writer has changed in
166  their internal organization.
167  To know more about ANSI and Unicode mode read \ref wxjson_tutorial_unicode.
168 */
169 
170 
171 
172 // if you have the debug build of wxWidgets and wxJSON you can see
173 // trace messages by setting the:
174 // WXTRACE=traceReader StoreComment
175 // environment variable
176 #if wxDEBUG_LEVEL > 0
177 static const wxChar* traceMask = _T("traceReader");
178 static const wxChar* storeTraceMask = _T("StoreComment");
179 #endif
180 
181 //! Ctor
182 /*!
183  Construct a JSON parser object with the given parameters.
184 
185  JSON parser objects should always be constructed on the stack but
186  it does not hurt to have a global JSON parser.
187 
188  \param flags this paramter controls how much error-tolerant should the
189         parser be
190 
191  \param maxErrors the maximum number of errors (and warnings, too) that are
192     reported by the parser. When the number of errors reaches this limit,
193     the parser stops to read the JSON input text and no other error is
194     reported.
195 
196  The \c flag parameter is the combination of ZERO or more of the
197  following constants OR'ed toghether:
198 
199  \li wxJSONREADER_ALLOW_COMMENTS: C/C++ comments are recognized by the
200      parser; a warning is reported by the parser
201  \li wxJSONREADER_STORE_COMMENTS: C/C++ comments, if recognized, are
202      stored in the value they refer to and can be rewritten back to
203      the JSON text
204  \li wxJSONREADER_CASE: the parser recognizes mixed-case literal strings
205  \li wxJSONREADER_MISSING: the parser allows missing or wrong close-object
206      and close-array characters
207  \li wxJSONREADER_MULTISTRING: strings may be splitted in two or more
208      lines
209  \li wxJSONREADER_COMMENTS_AFTER: if STORE_COMMENTS if defined, the parser
210      assumes that comment lines apear \b before the value they
211      refer to unless this constant is specified. In the latter case,
212      comments apear \b after the value they refer to.
213  \li wxJSONREADER_NOUTF8_STREAM: suppress UTF-8 conversion when reading a
214          string value from a stream: the reader assumes that the input stream
215          is encoded in ANSI format and not in UTF-8; only meaningfull in ANSI
216          builds, this flag is simply ignored in Unicode builds.
217 
218  You can also use the following shortcuts to specify some predefined
219  flag's combinations:
220 
221   \li wxJSONREADER_STRICT: all wxJSON extensions are reported as errors, this
222       is the same as specifying a ZERO value as \c flags.
223   \li wxJSONREADER_TOLERANT: this is the same as ALLOW_COMMENTS | CASE |
224       MISSING | MULTISTRING; all wxJSON extensions are turned on but comments
225       are not stored in the value objects.
226 
227  \par Example:
228 
229  The following code fragment construct a JSON parser, turns on all
230  wxJSON extensions and also stores C/C++ comments in the value object
231  they refer to. The parser assumes that the comments apear before the
232  value:
233 
234  \code
235    wxJSONReader reader( wxJSONREADER_TOLERANT | wxJSONREADER_STORE_COMMENTS );
236    wxJSONValue  root;
237    int numErrors = reader.Parse( jsonText, &root );
238  \endcode
239 */
wxJSONReader(int flags,int maxErrors)240 wxJSONReader::wxJSONReader( int flags, int maxErrors )
241 {
242     m_flags     = flags;
243     m_maxErrors = maxErrors;
244     m_noUtf8    = false;
245 #if !defined( wxJSON_USE_UNICODE )
246     // in ANSI builds we can suppress UTF-8 conversion for both the writer and the reader
247     if ( m_flags & wxJSONREADER_NOUTF8_STREAM )    {
248         m_noUtf8 = true;
249     }
250 #endif
251 
252 }
253 
254 //! Dtor - does nothing
~wxJSONReader()255 wxJSONReader::~wxJSONReader()
256 {
257 }
258 
259 //! Parse the JSON document.
260 /*!
261  The two overloaded versions of the \c Parse() function read a
262  JSON text stored in a wxString object or in a wxInputStream
263  object.
264 
265  If \c val is a NULL pointer, the function does not store the
266  values: it can be used as a JSON checker in order to check the
267  syntax of the document.
268  Returns the number of \b errors found in the document.
269  If the returned value is ZERO and the parser was constructed
270  with the \c wxJSONREADER_STRICT flag, then the parsed document
271  is \e well-formed and it only contains valid JSON text.
272 
273  If the \c wxJSONREADER_TOLERANT flag was used in the parser's
274  constructor, then a return value of ZERO
275  does not mean that the document is \e well-formed because it may
276  contain comments and other extensions that are not fatal for the
277  wxJSON parser but other parsers may fail to recognize.
278  You can use the \c GetWarningCount() function to know how many
279  wxJSON extensions are present in the JSON input text.
280 
281  Note that the JSON value object \c val is not cleared by this
282  function unless its type is of the wrong type.
283  In other words, if \c val is of type wxJSONTYPE_ARRAY and it already
284  contains 10 elements and the input document starts with a
285  '[' (open-array char) then the elements read from the document are
286  \b appended to the existing ones.
287 
288  On the other hand, if the text document starts with a '{' (open-object) char
289  then this function must change the type of the \c val object to
290  \c wxJSONTYPE_OBJECT and the old content of 10 array elements will be lost.
291 
292  \par Different input types
293 
294  The real parsing process in done using UTF-8 streams. If the input is
295  from a \b wxString object, the Parse function first converts the input string
296  in a temporary \b wxMemoryInputStream which contains the UTF-8 conversion
297  of the string itself.
298  Next, the overloaded Parse function is called.
299 
300  @param doc    the JSON text that has to be parsed
301  @param val    the wxJSONValue object that contains the parsed text; if NULL the
302          parser do not store anything but errors and warnings are reported
303  @return the total number of errors encontered
304 */
305 int
Parse(const wxString & doc,wxJSONValue * val)306 wxJSONReader:: Parse( const wxString& doc, wxJSONValue* val )
307 {
308 #if !defined( wxJSON_USE_UNICODE )
309     // in ANSI builds input from a string never use UTF-8 conversion
310     bool noUtf8_bak = m_noUtf8;        // save the current setting
311     m_noUtf8 = true;
312 #endif
313 
314     // convert the string to a UTF-8 / ANSI memory stream and calls overloaded Parse()
315     char* readBuff = 0;
316     wxCharBuffer utf8CB = doc.ToUTF8();        // the UTF-8 buffer
317 #if !defined( wxJSON_USE_UNICODE )
318     wxCharBuffer ansiCB( doc.c_str());        // the ANSI buffer
319     if ( m_noUtf8 )    {
320         readBuff = ansiCB.data();
321     }
322     else    {
323         readBuff = utf8CB.data();
324     }
325 #else
326         readBuff = utf8CB.data();
327 #endif
328 
329     // now construct the temporary memory input stream
330     size_t len = strlen( readBuff );
331     wxMemoryInputStream is( readBuff, len );
332 
333     int numErr = Parse( is, val );
334 #if !defined( wxJSON_USE_UNICODE )
335     m_noUtf8 = noUtf8_bak;
336 #endif
337     return numErr;
338 }
339 
340 //! \overload Parse( const wxString&, wxJSONValue* )
341 int
Parse(wxInputStream & is,wxJSONValue * val)342 wxJSONReader::Parse( wxInputStream& is, wxJSONValue* val )
343 {
344     // if val == 0 the 'temp' JSON value will be passed to DoRead()
345     wxJSONValue temp;
346     m_level    = 0;
347     m_depth    = 0;
348     m_lineNo   = 1;
349     m_colNo    = 1;
350     m_peekChar = -1;
351     m_errors.clear();
352     m_warnings.clear();
353 
354     // if a wxJSONValue is not passed to the Parse function
355     // we set the temparary object created on the stack
356     // I know this will slow down the validation of input
357     if ( val == 0 )  {
358         val = &temp;
359     }
360     wxASSERT( val );
361 
362     // set the wxJSONValue object's pointers for comment storage
363     m_next       = val;
364     m_next->SetLineNo( -1 );
365     m_lastStored = 0;
366     m_current    = 0;
367 
368     int ch = GetStart( is );
369     switch ( ch )  {
370         case '{' :
371         val->SetType( wxJSONTYPE_OBJECT );
372         break;
373     case '[' :
374         val->SetType( wxJSONTYPE_ARRAY );
375         break;
376     default :
377         AddError( _T("Cannot find a start object/array character" ));
378         return m_errors.size();
379         break;
380     }
381 
382     // returning from DoRead() could be for EOF or for
383     // the closing array-object character
384     // if -1 is returned, it is as an error because the lack
385     // of close-object/array characters
386     // note that the missing close-chars error messages are
387     // added by the DoRead() function
388     ch = DoRead( is, *val );
389     return m_errors.size();
390 }
391 
392 
393 //! Returns the start of the document
394 /*!
395  This is the first function called by the Parse() function and it searches
396  the input stream for the starting character of a JSON text and returns it.
397  JSON text start with '{' or '['.
398  If the two starting characters are inside a C/C++ comment, they
399  are ignored.
400  Returns the JSON-text start character or -1 on EOF.
401 
402  @param is    the input stream that contains the JSON text
403  @return -1 on errors or EOF; one of '{' or '['
404 */
405 int
GetStart(wxInputStream & is)406 wxJSONReader::GetStart( wxInputStream& is )
407 {
408     int ch = 0;
409     do  {
410         switch ( ch )  {
411             case 0 :
412                 ch = ReadChar( is );
413                 break;
414             case '{' :
415                 return ch;
416                 break;
417             case '[' :
418                 return ch;
419                 break;
420             case '/' :
421                 ch = SkipComment( is );
422                 StoreComment( 0 );
423                 break;
424             default :
425                 ch = ReadChar( is );
426                 break;
427         }
428     } while ( ch >= 0 );
429     return ch;
430 }
431 
432 //! Return a reference to the error message's array.
433 const wxArrayString&
GetErrors() const434 wxJSONReader::GetErrors() const
435 {
436     return m_errors;
437 }
438 
439 //! Return a reference to the warning message's array.
440 const wxArrayString&
GetWarnings() const441 wxJSONReader::GetWarnings() const
442 {
443     return m_warnings;
444 }
445 
446 //! Return the depth of the JSON input text
447 /*!
448  The function returns the number of times the recursive \c DoRead function was
449  called in the parsing process thus returning the maximum depth of the JSON
450  input text.
451 */
452 int
GetDepth() const453 wxJSONReader::GetDepth() const
454 {
455     return m_depth;
456 }
457 
458 
459 
460 //! Return the size of the error message's array.
461 int
GetErrorCount() const462 wxJSONReader::GetErrorCount() const
463 {
464     return m_errors.size();
465 }
466 
467 //! Return the size of the warning message's array.
468 int
GetWarningCount() const469 wxJSONReader::GetWarningCount() const
470 {
471     return m_warnings.size();
472 }
473 
474 
475 //! Read a character from the input JSON document.
476 /*!
477  The function returns the next byte from the UTF-8 stream as an INT.
478  In case of errors or EOF, the function returns -1.
479  The function also updates the \c m_lineNo and \c m_colNo data
480  members and converts all CR+LF sequence in LF.
481 
482  This function only returns one byte UTF-8 (one code unit)
483  at a time and not Unicode code points.
484  The only reason for this function is to process line and column
485  numbers.
486 
487  @param is    the input stream that contains the JSON text
488  @return the next char (one single byte) in the input stream or -1 on error or EOF
489 */
490 int
ReadChar(wxInputStream & is)491 wxJSONReader::ReadChar( wxInputStream& is )
492 {
493     if ( is.Eof())    {
494         return -1;
495     }
496 
497     unsigned char ch = is.GetC();
498     size_t last = is.LastRead();    // returns ZERO if EOF
499     if ( last == 0 )    {
500         return -1;
501     }
502 
503     // the function also converts CR in LF. only LF is returned
504     // in the case of CR+LF
505     int nextChar;
506 
507     if ( ch == '\r' )  {
508         m_colNo = 1;
509         nextChar = PeekChar( is );
510         if ( nextChar == -1 )  {
511             return -1;
512         }
513         else if ( nextChar == '\n' )    {
514             ch = is.GetC();
515         }
516     }
517     if ( ch == '\n' )  {
518         ++m_lineNo;
519         m_colNo = 1;
520     }
521     else  {
522         ++m_colNo;
523     }
524     return (int) ch;
525 }
526 
527 
528 //! Peek a character from the input JSON document
529 /*!
530  This function just calls the \b Peek() function on the stream
531  and returns it.
532 
533  @param is    the input stream that contains the JSON text
534  @return the next char (one single byte) in the input stream or -1 on error or EOF
535 */
536 int
PeekChar(wxInputStream & is)537 wxJSONReader::PeekChar( wxInputStream& is )
538 {
539     int ch = -1; unsigned char c;
540     if ( !is.Eof())    {
541         c = is.Peek();
542         ch = c;
543     }
544     return ch;
545 }
546 
547 
548 //! Reads the JSON text document (internal use)
549 /*!
550  This is a recursive function that is called by \c Parse()
551  and by the \c DoRead() function itself when a new object /
552  array character is encontered.
553  The function returns when a EOF condition is encontered or
554  when the corresponding close-object / close-array char is encontered.
555  The function also increments the \c m_level
556  data member when it is entered and decrements it on return.
557  It also sets \c m_depth equal to \c m_level if \c m_depth is
558  less than \c m_level.
559 
560  The function is the heart of the wxJSON parser class but it is
561  also very easy to understand because JSON syntax is very
562  easy.
563 
564  Returns the last close-object/array character read or -1 on EOF.
565 
566  @param is    the input stream that contains the JSON text
567  @param parent the JSON value object that is the parent of all subobjects
568          read by the function until the next close-object/array (for
569          the top-level \c DoRead function \c parent is the root JSON object)
570  @return one of close-array or close-object char or -1 on error or EOF
571 */
572 int
DoRead(wxInputStream & is,wxJSONValue & parent)573 wxJSONReader::DoRead( wxInputStream& is, wxJSONValue& parent )
574 {
575     ++m_level;
576     if ( m_depth < m_level )    {
577         m_depth = m_level;
578     }
579 
580     // 'value' is the wxJSONValue structure that has to be
581     // read. Data read from the JSON text input is stored
582     // in the following object.
583     wxJSONValue value( wxJSONTYPE_INVALID );
584 
585     // sets the pointers to the current, next and last-stored objects
586     // in order to determine the value to which a comment refers to
587     m_next = &value;
588     m_current = &parent;
589     m_current->SetLineNo( m_lineNo );
590     m_lastStored = 0;
591 
592     // the 'key' string is stored from 'value' when a ':' is encontered
593     wxString  key;
594 
595     // the character read: -1=EOF, 0=to be read
596     int ch=0;
597 
598     do {                   // we read until ch < 0
599         switch ( ch )  {
600             case 0 :
601                 ch = ReadChar( is );
602                 break;
603             case ' ' :
604             case '\t' :
605             case '\n' :
606             case '\r' :
607                 ch = SkipWhiteSpace( is );
608                 break;
609             case -1 :   // the EOF
610                 break;
611             case '/' :
612                 ch = SkipComment( is );
613                 StoreComment( &parent );
614                 break;
615 
616             case '{' :
617                 if ( parent.IsObject() ) {
618                     if ( key.empty() )   {
619                         AddError( _T("\'{\' is not allowed here (\'name\' is missing") );
620                     }
621                     if ( value.IsValid() )   {
622                         AddError( _T("\'{\' cannot follow a \'value\'") );
623                           }
624                 }
625                 else if ( parent.IsArray() )  {
626                     if ( value.IsValid() )   {
627                         AddError( _T("\'{\' cannot follow a \'value\' in JSON array") );
628                     }
629                 }
630                 else  {
631                     wxJSON_ASSERT( 0 );       // always fails
632                 }
633 
634                 // the openobject char cause the DoRead() to be called recursively
635                 value.SetType( wxJSONTYPE_OBJECT );
636                 ch = DoRead( is, value );
637                 break;
638 
639             case '}' :
640                 if ( !parent.IsObject() )  {
641                     AddWarning( wxJSONREADER_MISSING,
642                     _T("Trying to close an array using the \'}\' (close-object) char" ));
643                 }
644                 // close-object: store the current value, if any
645                 StoreValue( ch, key, value, parent );
646                 m_current = &parent;
647                 m_next    = 0;
648                 m_current->SetLineNo( m_lineNo );
649                 ch = ReadChar( is );
650                 return ch;
651                 break;
652 
653             case '[' :
654                 if ( parent.IsObject() ) {
655                     if ( key.empty() )   {
656                         AddError( _T("\'[\' is not allowed here (\'name\' is missing") );
657                     }
658                     if ( value.IsValid() )   {
659                         AddError( _T("\'[\' cannot follow a \'value\' text") );
660                     }
661                 }
662                 else if ( parent.IsArray())  {
663                     if ( value.IsValid() )   {
664                         AddError( _T("\'[\' cannot follow a \'value\'") );
665                     }
666                 }
667                 else  {
668                     wxJSON_ASSERT( 0 );       // always fails
669                 }
670                 // open-array cause the DoRead() to be called recursively
671                 value.SetType( wxJSONTYPE_ARRAY );
672                 ch = DoRead( is, value );
673                 break;
674 
675             case ']' :
676                 if ( !parent.IsArray() )  {
677                     // wrong close-array char (should be close-object)
678                     AddWarning( wxJSONREADER_MISSING,
679                     _T("Trying to close an object using the \']\' (close-array) char" ));
680                 }
681                 StoreValue( ch, key, value, parent );
682                 m_current = &parent;
683                 m_next    = 0;
684                 m_current->SetLineNo( m_lineNo );
685                 return 0;   // returning ZERO for reading the next char
686                 break;
687 
688             case ',' :
689                 // store the value, if any
690                 StoreValue( ch, key, value, parent );
691                 key.clear();
692                 ch = ReadChar( is );
693                 break;
694 
695             case '\"' :
696                 ch = ReadString( is, value );     // read a JSON string type
697                 m_current = &value;
698                 m_next    = 0;
699                 break;
700 
701             case '\'' :
702                 ch = ReadMemoryBuff( is, value );  // read a memory buffer type
703                 m_current = &value;
704                 m_next    = 0;
705                 break;
706 
707             case ':' :   // key / value separator
708                 m_current = &value;
709                 m_current->SetLineNo( m_lineNo );
710                 m_next    = 0;
711                 if ( !parent.IsObject() )  {
712                     AddError( _T( "\':\' can only used in object's values" ));
713                 }
714                 else if ( !value.IsString() )  {
715                     AddError( _T( "\':\' follows a value which is not of type \'string\'" ));
716                 }
717                 else if ( !key.empty() )  {
718                     AddError( _T( "\':\' not allowed where a \'name\' string was already available" ));
719                 }
720                 else  {
721                     // the string in 'value' is set as the 'key'
722                     key = value.AsString();
723                     value.SetType( wxJSONTYPE_INVALID );
724                 }
725                 ch = ReadChar( is );
726                 break;
727 
728             default :
729                 // no special char: it is a literal or a number
730                 // errors are checked in the 'ReadValue()' function.
731                 m_current = &value;
732                 m_current->SetLineNo( m_lineNo );
733                 m_next    = 0;
734                 ch = ReadValue( is, ch, value );
735                 break;
736         } // end switch
737     } while ( ch >= 0 );
738 
739     // the DoRead() should return when the close-object/array char is encontered
740     // if we are here, the EOF condition was encontered so one or more close-something
741     // characters are missing
742     if ( parent.IsArray() )  {
743         AddWarning( wxJSONREADER_MISSING, _T("\']\' missing at end of file"));
744     }
745     else if ( parent.IsObject() )  {
746         AddWarning( wxJSONREADER_MISSING, _T("\'}\' missing at end of file"));
747     }
748     else  {
749         wxJSON_ASSERT( 0 );
750     }
751 
752     // we store the value, as there is a missing close-object/array char
753     StoreValue( ch, key, value, parent );
754 
755     --m_level;
756     return ch;
757 }
758 
759 //! Store a value in the parent object.
760 /*!
761  The function is called by \c DoRead() when a the comma
762  or a close-object/array character is encontered and stores the current
763  value read by the parser in the parent object.
764  The function checks that \c value is not invalid and that \c key is
765  not an empty string if \c parent is an object.
766 
767  \param ch    the character read: a comma or close objecty/array char
768  \param key    the \b key string: must be empty if \c parent is an array
769  \param value    the current JSON value to be stored in \c parent
770  \param parent    the JSON value that is the parent of \c value.
771  \return none
772 */
773 void
StoreValue(int ch,const wxString & key,wxJSONValue & value,wxJSONValue & parent)774 wxJSONReader::StoreValue( int ch, const wxString& key, wxJSONValue& value, wxJSONValue& parent )
775 {
776     // if 'ch' == } or ] than value AND key may be empty when a open object/array
777     // is immediatly followed by a close object/array
778     //
779     // if 'ch' == , (comma) value AND key (for TypeMap) cannot be empty
780     //
781     wxLogTrace( traceMask, _T("(%s) ch=%d char=%c"), __PRETTY_FUNCTION__, ch, (char) ch);
782     wxLogTrace( traceMask, _T("(%s) value=%s"), __PRETTY_FUNCTION__, value.AsString().c_str());
783 
784     m_current = 0;
785     m_next    = &value;
786     m_lastStored = 0;
787     m_next->SetLineNo( -1 );
788 
789     if ( !value.IsValid() && key.empty() ) {
790         // OK, if the char read is a close-object or close-array
791         if ( ch == '}' || ch == ']' )  {
792             m_lastStored = 0;
793             wxLogTrace( traceMask, _T("(%s) key and value are empty, returning"),
794                              __PRETTY_FUNCTION__);
795         }
796         else  {
797             AddError( _T("key or value is missing for JSON value"));
798         }
799     }
800     else  {
801         // key or value are not empty
802         if ( parent.IsObject() )  {
803             if ( !value.IsValid() ) {
804                 AddError( _T("cannot store the value: \'value\' is missing for JSON object type"));
805              }
806              else if ( key.empty() ) {
807                 AddError( _T("cannot store the value: \'key\' is missing for JSON object type"));
808             }
809             else  {
810                 // OK, adding the value to parent key/value map
811                 wxLogTrace( traceMask, _T("(%s) adding value to key:%s"),
812                      __PRETTY_FUNCTION__, key.c_str());
813                 parent[key] = value;
814                 m_lastStored = &(parent[key]);
815                 m_lastStored->SetLineNo( m_lineNo );
816             }
817         }
818         else if ( parent.IsArray() ) {
819             if ( !value.IsValid() ) {
820                     AddError( _T("cannot store the item: \'value\' is missing for JSON array type"));
821             }
822             if ( !key.empty() ) {
823                 AddError( _T("cannot store the item: \'key\' (\'%s\') is not permitted in JSON array type"), key);
824             }
825             wxLogTrace( traceMask, _T("(%s) appending value to parent array"),
826                                  __PRETTY_FUNCTION__ );
827             parent.Append( value );
828             const wxJSONInternalArray* arr = parent.AsArray();
829             wxJSON_ASSERT( arr );
830             m_lastStored = &(arr->Last());
831             m_lastStored->SetLineNo( m_lineNo );
832         }
833         else  {
834             wxJSON_ASSERT( 0 );  // should never happen
835         }
836     }
837     value.SetType( wxJSONTYPE_INVALID );
838     value.ClearComments();
839 }
840 
841 //! Add a error message to the error's array
842 /*!
843  The overloaded versions of this function add an error message to the
844  error's array stored in \c m_errors.
845  The error message is formatted as follows:
846 
847  \code
848    Error: line xxx, col xxx - <error_description>
849  \endcode
850 
851  The \c msg parameter is the description of the error; line's and column's
852  number are automatically added by the functions.
853  The \c fmt parameter is a format string that has the same syntax as the \b printf
854  function.
855  Note that it is the user's responsability to provide a format string suitable
856  with the arguments: another string or a character.
857 */
858 void
AddError(const wxString & msg)859 wxJSONReader::AddError( const wxString& msg )
860 {
861     wxString err;
862     err.Printf( _T("Error: line %d, col %d - %s"), m_lineNo, m_colNo, msg.c_str() );
863 
864     wxLogTrace( traceMask, _T("(%s) %s"), __PRETTY_FUNCTION__, err.c_str());
865 
866     if ( (int) m_errors.size() < m_maxErrors )  {
867         m_errors.Add( err );
868     }
869     else if ( (int) m_errors.size() == m_maxErrors )  {
870         m_errors.Add( _T("ERROR: too many error messages - ignoring further errors"));
871     }
872     // else if ( m_errors > m_maxErrors ) do nothing, thus ignore the error message
873 }
874 
875 //! \overload AddError( const wxString& )
876 void
AddError(const wxString & fmt,const wxString & str)877 wxJSONReader::AddError( const wxString& fmt, const wxString& str )
878 {
879     wxString s;
880     s.Printf( fmt.c_str(), str.c_str() );
881     AddError( s );
882 }
883 
884 //! \overload AddError( const wxString& )
885 void
AddError(const wxString & fmt,wxChar c)886 wxJSONReader::AddError( const wxString& fmt, wxChar c )
887 {
888     wxString s;
889     s.Printf( fmt.c_str(), c );
890     AddError( s );
891 }
892 
893 //! Add a warning message to the warning's array
894 /*!
895  The warning description is as follows:
896  \code
897    Warning: line xxx, col xxx - <warning_description>
898  \endcode
899 
900  Warning messages are generated by the parser when the JSON
901  text that has been read is not well-formed but the
902  error is not fatal and the parser recognizes the text
903  as an extension to the JSON standard (see the parser's ctor
904  for more info about wxJSON extensions).
905 
906  Note that the parser has to be constructed with a flag that
907  indicates if each individual wxJSON extension is on.
908  If the warning message is related to an extension that is not
909  enabled in the parser's \c m_flag data member, this function
910  calls AddError() and the warning message becomes an error
911  message.
912  The \c type parameter is one of the same constants that
913  specify the parser's extensions.
914  If type is ZERO than the function always adds a warning
915 */
916 void
AddWarning(int type,const wxString & msg)917 wxJSONReader::AddWarning( int type, const wxString& msg )
918 {
919     // if 'type' AND 'm_flags' == 1 than the extension is
920     // ON. Otherwise it is OFF anf the function calls AddError()
921     if ( type != 0 )    {
922         if ( ( type & m_flags ) == 0 )  {
923             AddError( msg );
924             return;
925         }
926     }
927 
928     wxString err;
929     err.Printf( _T( "Warning: line %d, col %d - %s"), m_lineNo, m_colNo, msg.c_str() );
930 
931     wxLogTrace( traceMask, _T("(%s) %s"), __PRETTY_FUNCTION__, err.c_str());
932     if ( (int) m_warnings.size() < m_maxErrors )  {
933         m_warnings.Add( err );
934     }
935     else if ( (int) m_warnings.size() == m_maxErrors )  {
936         m_warnings.Add( _T("Error: too many warning messages - ignoring further warnings"));
937     }
938     // else do nothing, thus ignore the warning message
939 }
940 
941 //! Skip all whitespaces.
942 /*!
943  The function reads characters from the input text
944  and returns the first non-whitespace character read or -1
945  if EOF.
946  Note that the function does not rely on the \b isspace function
947  of the C library but checks the space constants: space, TAB and
948  LF.
949 */
950 int
SkipWhiteSpace(wxInputStream & is)951 wxJSONReader::SkipWhiteSpace( wxInputStream& is )
952 {
953     // just read one byte at a time and check for whitespaces
954     int ch;
955     do {
956         ch = ReadChar( is );
957         if ( ch < 0 )  {
958             break;
959         }
960     }
961     while ( ch == ' ' || ch == '\n' || ch == '\t' );
962     wxLogTrace( traceMask, _T("(%s) end whitespaces line=%d col=%d"),
963              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
964     return ch;
965 }
966 
967 //! Skip a comment
968 /*!
969  The function is called by DoRead() when a '/' (slash) character
970  is read from the input stream assuming that a C/C++ comment is starting.
971  Returns the first character that follows the comment or
972  -1 on EOF.
973  The function also adds a warning message because comments are not
974  valid JSON text.
975  The function also stores the comment, if any, in the \c m_comment data
976  member: it can be used by the DoRead() function if comments have to be
977  stored in the value they refer to.
978 */
979 int
SkipComment(wxInputStream & is)980 wxJSONReader::SkipComment( wxInputStream& is )
981 {
982     static const wxChar* warn =
983     _T("Comments may be tolerated in JSON text but they are not part of JSON syntax");
984 
985     // if it is a comment, then a warning is added to the array
986     // otherwise it is an error: values cannot start with a '/'
987     // read the char next to the first slash
988     int ch = ReadChar( is );
989     if ( ch < 0 )  {
990         return -1;
991     }
992 
993     wxLogTrace( storeTraceMask, _T("(%s) start comment line=%d col=%d"),
994              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
995 
996     // the temporary UTF-8/ANSI buffer that holds the comment string. This will be
997     // converted to a wxString object using wxString::FromUTF8() or From8BitData()
998     wxMemoryBuffer utf8Buff;
999     unsigned char c;
1000 
1001     if ( ch == '/' )  {         // C++ comment, read until end-of-line
1002         // C++ comment strings are in UTF-8 format. we store all
1003         // UTF-8 code units until the first LF or CR+LF
1004         AddWarning( wxJSONREADER_ALLOW_COMMENTS, warn );
1005         m_commentLine = m_lineNo;
1006         utf8Buff.AppendData( "//", 2 );
1007 
1008         while ( ch >= 0 )  {
1009             if ( ch == '\n' )    {
1010                 break;
1011             }
1012             if ( ch == '\r' )    {
1013                 ch = PeekChar( is );
1014                 if ( ch == '\n' )    {
1015                     ch = ReadChar( is );
1016                 }
1017                 break;
1018             }
1019             else    {
1020                 // store the char in the UTF8 temporary buffer
1021                 c = (unsigned char) ch;
1022                 utf8Buff.AppendByte( c );
1023             }
1024             ch = ReadChar( is );
1025         }
1026         // now convert the temporary UTF-8 buffer
1027         m_comment = wxString::FromUTF8( (const char*) utf8Buff.GetData(),
1028                         utf8Buff.GetDataLen());
1029     }
1030 
1031     // check if a C-style comment
1032     else if ( ch == '*' )  {     // C-style comment
1033         AddWarning(wxJSONREADER_ALLOW_COMMENTS, warn );
1034         m_commentLine = m_lineNo;
1035         utf8Buff.AppendData( "/*", 2 );
1036         while ( ch >= 0 ) {
1037             // check the END-COMMENT chars ('*/')
1038             if ( ch == '*' )    {
1039                 ch = PeekChar( is );
1040                 if ( ch == '/' )    {
1041                     ch = ReadChar( is );  // read the '/' char
1042                     ch = ReadChar( is );  // read the next char that will be returned
1043                     utf8Buff.AppendData( "*/", 2 );
1044                     break;
1045                 }
1046             }
1047             // store the char in the UTF8 temporary buffer
1048             c = (unsigned char) ch;
1049             utf8Buff.AppendByte( c );
1050             ch = ReadChar( is );
1051         }
1052         // now convert the temporary buffer in a wxString object
1053         if ( m_noUtf8 )    {
1054             m_comment = wxString::From8BitData( (const char*) utf8Buff.GetData(),
1055                                 utf8Buff.GetDataLen());
1056         }
1057         else    {
1058             m_comment = wxString::FromUTF8( (const char*) utf8Buff.GetData(),
1059                                 utf8Buff.GetDataLen());
1060         }
1061     }
1062 
1063     else  {   // it is not a comment, return the character next the first '/'
1064         AddError( _T( "Strange '/' (did you want to insert a comment?)"));
1065         // we read until end-of-line OR end of C-style comment OR EOF
1066         // because a '/' should be a start comment
1067         while ( ch >= 0 ) {
1068             ch = ReadChar( is );
1069             if ( ch == '*' && PeekChar( is ) == '/' )  {
1070                 break;
1071             }
1072             if ( ch == '\n' )  {
1073                 break;
1074             }
1075         }
1076         // read the next char that will be returned
1077         ch = ReadChar( is );
1078     }
1079     wxLogTrace( traceMask, _T("(%s) end comment line=%d col=%d"),
1080              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1081     wxLogTrace( storeTraceMask, _T("(%s) end comment line=%d col=%d"),
1082              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1083     wxLogTrace( storeTraceMask, _T("(%s) comment=%s"),
1084              __PRETTY_FUNCTION__, m_comment.c_str());
1085     return ch;
1086 }
1087 
1088 //! Read a string value
1089 /*!
1090  The function reads a string value from input stream and it is
1091  called by the \c DoRead() function when it enconters the
1092  double quote characters.
1093  The function read all bytes up to the next double quotes
1094  (unless it is escaped) and stores them in a temporary UTF-8
1095  memory buffer.
1096  Also, the function processes the escaped characters defined
1097  in the JSON syntax.
1098 
1099  Next, the function tries to convert the UTF-8 buffer to a
1100  \b wxString object using the \b wxString::FromUTF8 function.
1101  Depending on the build mode, we can have the following:
1102  \li in Unicode the function always succeeds, provided that the
1103     buffer contains valid UTF-8 code units.
1104 
1105  \li in ANSI builds the conversion may fail because of the presence of
1106     unrepresentable characters in the current locale. In this case,
1107     the default behaviour is to perform a char-by-char conversion; every
1108     char that cannot be represented in the current locale is stored as
1109     \e unicode \e escaped \e sequence
1110 
1111  \li in ANSI builds, if the reader is constructed with the wxJSONREADER_NOUTF8_STREAM
1112      then no conversion takes place and the UTF-8 temporary buffer is simply
1113      \b copied to the \b wxString object
1114 
1115  The string is, finally, stored in the provided wxJSONValue argument
1116  provided that it is empty or it contains a string value.
1117  This is because the parser class recognizes multi-line strings
1118  like the following one:
1119  \code
1120    [
1121       "This is a very long string value which is splitted into more"
1122       "than one line because it is more human readable"
1123    ]
1124  \endcode
1125  Because of the lack of the value separator (,) the parser
1126  assumes that the string was splitted into several double-quoted
1127  strings.
1128  If the value does not contain a string then an error is
1129  reported.
1130  Splitted strings cause the parser to report a warning.
1131 */
1132 int
ReadString(wxInputStream & is,wxJSONValue & val)1133 wxJSONReader::ReadString( wxInputStream& is, wxJSONValue& val )
1134 {
1135     // the char last read is the opening qoutes (")
1136 
1137     wxMemoryBuffer utf8Buff;
1138     char ues[8];        // stores a Unicode Escaped Esquence: \uXXXX
1139 
1140     int ch = 0;
1141     while ( ch >= 0 ) {
1142         ch = ReadChar( is );
1143         unsigned char c = (unsigned char) ch;
1144         if ( ch == '\\' )  {    // an escape sequence
1145             ch = ReadChar( is );
1146             switch ( ch )  {
1147                 case -1 :        // EOF
1148                     break;
1149                 case 't' :
1150                     utf8Buff.AppendByte( '\t' );
1151                     break;
1152                 case 'n' :
1153                     utf8Buff.AppendByte( '\n' );
1154                     break;
1155                 case 'b' :
1156                     utf8Buff.AppendByte( '\b' );
1157                     break;
1158                 case 'r' :
1159                     utf8Buff.AppendByte( '\r' );
1160                     break;
1161                 case '\"' :
1162                     utf8Buff.AppendByte( '\"' );
1163                     break;
1164                 case '\\' :
1165                     utf8Buff.AppendByte( '\\' );
1166                     break;
1167                 case '/' :
1168                     utf8Buff.AppendByte( '/' );
1169                     break;
1170                 case 'f' :
1171                     utf8Buff.AppendByte( '\f' );
1172                     break;
1173                 case 'u' :
1174                     ch = ReadUES( is, ues );
1175                     if ( ch < 0 ) {        // if EOF, returns
1176                         return ch;
1177                     }
1178                     // append the escaped character to the UTF8 buffer
1179                     AppendUES( utf8Buff, ues );
1180                     // many thanks to Bryan Ashby who discovered this bug
1181                     continue;
1182                     // break;
1183                 default :
1184                     AddError( _T( "Unknow escaped character \'\\%c\'"), ch );
1185             }
1186         }
1187         else {
1188             // we have read a non-escaped character so we have to append it to
1189             // the temporary UTF-8 buffer until the next quote char
1190             if ( ch == '\"' )    {
1191                 break;
1192             }
1193             utf8Buff.AppendByte( c );
1194         }
1195     }
1196 
1197     // if UTF-8 conversion is disabled (ANSI builds only) we just copy the
1198     // bit data to a wxString object
1199     wxString s;
1200     if ( m_noUtf8 )    {
1201         s = wxString::From8BitData( (const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1202     }
1203     else    {
1204         // perform UTF-8 conversion
1205         // first we check that the UTF-8 buffer is correct, i.e. it contains valid
1206         // UTF-8 code points.
1207         // this works in both ANSI and Unicode builds.
1208         size_t convLen = wxConvUTF8.ToWChar( 0,        // wchar_t destination
1209                         0,                            // size_t  destLenght
1210             (const char*) utf8Buff.GetData(),        // char_t  source
1211                 utf8Buff.GetDataLen());                // size_t  sourceLenght
1212 
1213         if ( convLen == wxCONV_FAILED )    {
1214             AddError( _T( "String value: the UTF-8 stream is invalid"));
1215             s.append( _T( "<UTF-8 stream not valid>"));
1216         }
1217         else    {
1218 #if defined( wxJSON_USE_UNICODE )
1219             // in Unicode just convert to wxString
1220             s = wxString::FromUTF8( (const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1221 #else
1222             // in ANSI, the conversion may fail and an empty string is returned
1223             // in this case, the reader do a char-by-char conversion storing
1224               // unicode escaped sequences of unrepresentable characters
1225             s = wxString::FromUTF8( (const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1226             if ( s.IsEmpty() )    {
1227                 int r = ConvertCharByChar( s, utf8Buff );    // return number of escaped sequences
1228                 if ( r > 0 )    {
1229                     AddWarning( 0, _T( "The string value contains unrepresentable Unicode characters"));
1230                 }
1231             }
1232 #endif
1233         }
1234      }
1235     wxLogTrace( traceMask, _T("(%s) line=%d col=%d"),
1236              __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1237     wxLogTrace( traceMask, _T("(%s) string read=%s"),
1238              __PRETTY_FUNCTION__, s.c_str() );
1239     wxLogTrace( traceMask, _T("(%s) value=%s"),
1240              __PRETTY_FUNCTION__, val.AsString().c_str() );
1241 
1242     // now assign the string to the JSON-value 'value'
1243     // must check that:
1244     //   'value'  is empty
1245     //   'value'  is a string; concatenate it but emit warning
1246     if ( !val.IsValid() )   {
1247         wxLogTrace( traceMask, _T("(%s) assigning the string to value"), __PRETTY_FUNCTION__ );
1248         val = s ;
1249     }
1250     else if ( val.IsString() )  {
1251         AddWarning( wxJSONREADER_MULTISTRING,
1252             _T("Multiline strings are not allowed by JSON syntax") );
1253         wxLogTrace( traceMask, _T("(%s) concatenate the string to value"), __PRETTY_FUNCTION__ );
1254         val.Cat( s );
1255     }
1256     else  {
1257         AddError( _T( "String value \'%s\' cannot follow another value"), s );
1258     }
1259 
1260     // store the input text's line number when the string was stored in 'val'
1261     val.SetLineNo( m_lineNo );
1262 
1263     // read the next char after the closing quotes and returns it
1264     if ( ch >= 0 )  {
1265         ch = ReadChar( is );
1266     }
1267     return ch;
1268 }
1269 
1270 //! Reads a token string
1271 /*!
1272  This function is called by the ReadValue() when the
1273  first character encontered is not a special char
1274  and it is not a double-quote.
1275  The only possible type is a literal or a number which
1276  all lies in the US-ASCII charset so their UTF-8 encodeing
1277  is the same as US-ASCII.
1278  The function simply reads one byte at a time from the stream
1279  and appends them to a \b wxString object.
1280  Returns the next character read.
1281 
1282  A token cannot include \e unicode \e escaped \e sequences
1283  so this function does not try to interpret such sequences.
1284 
1285  @param is    the input stream
1286  @param ch    the character read by DoRead
1287  @param s    the string object that contains the token read
1288  @return -1 in case of errors or EOF
1289 */
1290 int
ReadToken(wxInputStream & is,int ch,wxString & s)1291 wxJSONReader::ReadToken( wxInputStream& is, int ch, wxString& s )
1292 {
1293     int nextCh = ch;
1294     while ( nextCh >= 0 ) {
1295         switch ( nextCh ) {
1296             case ' ' :
1297             case ',' :
1298             case ':' :
1299             case '[' :
1300             case ']' :
1301             case '{' :
1302             case '}' :
1303             case '\t' :
1304             case '\n' :
1305             case '\r' :
1306             case '\b' :
1307                 wxLogTrace( traceMask, _T("(%s) line=%d col=%d"),
1308                      __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1309                 wxLogTrace( traceMask, _T("(%s) token read=%s"),
1310                      __PRETTY_FUNCTION__, s.c_str() );
1311                 return nextCh;
1312                 break;
1313             default :
1314                 s.Append( (unsigned char) nextCh, 1 );
1315                 break;
1316         }
1317         // read the next character
1318         nextCh = ReadChar( is );
1319     }
1320     wxLogTrace( traceMask, _T("(%s) EOF on line=%d col=%d"),
1321          __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1322     wxLogTrace( traceMask, _T("(%s) EOF - token read=%s"),
1323              __PRETTY_FUNCTION__, s.c_str() );
1324     return nextCh;
1325 }
1326 
1327 //! Read a value from input stream
1328 /*!
1329  The function is called by DoRead() when it enconters a char that is
1330  not a special char nor a double-quote.
1331  It assumes that the string is a numeric value or a literal
1332  boolean value and stores it in the wxJSONValue object \c val.
1333 
1334  The function also checks that \c val is of type wxJSONTYPE_INVALID otherwise
1335  an error is reported becasue a value cannot follow another value:
1336  maybe a (,) or (:) is missing.
1337 
1338  If the literal starts with a digit, a plus or minus sign, the function
1339  tries to interpret it as a number. The following are tried by the function,
1340  in this order:
1341 
1342  \li if the literal starts with a digit: signed integer, then unsigned integer
1343         and finally double conversion is tried
1344  \li if the literal starts with a minus sign: signed integer, then  double
1345         conversion is tried
1346  \li if the literal starts with plus sign: unsigned integer
1347         then double conversion is tried
1348 
1349  Returns the next character or -1 on EOF.
1350 */
1351 int
ReadValue(wxInputStream & is,int ch,wxJSONValue & val)1352 wxJSONReader::ReadValue( wxInputStream& is, int ch, wxJSONValue& val )
1353 {
1354     wxString s;
1355     int nextCh = ReadToken( is, ch, s );
1356     wxLogTrace( traceMask, _T("(%s) value=%s"),
1357              __PRETTY_FUNCTION__, val.AsString().c_str() );
1358 
1359     if ( val.IsValid() )  {
1360         AddError( _T( "Value \'%s\' cannot follow a value: \',\' or \':\' missing?"), s );
1361         return nextCh;
1362     }
1363 
1364     // variables used for converting numeric values
1365     bool r;  double d;
1366 #if defined( wxJSON_64BIT_INT )
1367     wxInt64  i64;
1368     wxUint64 ui64;
1369 #else
1370     unsigned long int ul; long int l;
1371 #endif
1372 
1373     // first try the literal strings lowercase and nocase
1374     if ( s == _T("null") ) {
1375         val.SetType( wxJSONTYPE_NULL );
1376         wxLogTrace( traceMask, _T("(%s) value = NULL"),  __PRETTY_FUNCTION__ );
1377         return nextCh;
1378     }
1379     else if ( s.CmpNoCase( _T( "null" )) == 0 ) {
1380         wxLogTrace( traceMask, _T("(%s) value = NULL"),  __PRETTY_FUNCTION__ );
1381         AddWarning( wxJSONREADER_CASE, _T( "the \'null\' literal must be lowercase" ));
1382         val.SetType( wxJSONTYPE_NULL );
1383         return nextCh;
1384     }
1385     else if ( s == _T("true") ) {
1386         wxLogTrace( traceMask, _T("(%s) value = TRUE"),  __PRETTY_FUNCTION__ );
1387         val = true;
1388         return nextCh;
1389     }
1390     else if ( s.CmpNoCase( _T( "true" )) == 0 ) {
1391         wxLogTrace( traceMask, _T("(%s) value = TRUE"),  __PRETTY_FUNCTION__ );
1392         AddWarning( wxJSONREADER_CASE, _T( "the \'true\' literal must be lowercase" ));
1393         val = true;
1394         return nextCh;
1395     }
1396     else if ( s == _T("false") ) {
1397         wxLogTrace( traceMask, _T("(%s) value = FALSE"),  __PRETTY_FUNCTION__ );
1398         val = false;
1399         return nextCh;
1400     }
1401     else if ( s.CmpNoCase( _T( "false" )) == 0 ) {
1402         wxLogTrace( traceMask, _T("(%s) value = FALSE"),  __PRETTY_FUNCTION__ );
1403         AddWarning( wxJSONREADER_CASE, _T( "the \'false\' literal must be lowercase" ));
1404         val = false;
1405         return nextCh;
1406     }
1407 
1408 
1409     // try to convert to a number if the token starts with a digit, a plus or a minus
1410     // sign. The function first states what type of conversion are tested:
1411     //    1. first signed integer (not if 'ch' == '+')
1412     //    2. unsigned integer (not if 'ch' == '-')
1413     //    3. finally double
1414     bool tSigned = true, tUnsigned = true, tDouble = true;
1415     switch ( ch )  {
1416         case '0' :
1417         case '1' :
1418         case '2' :
1419         case '3' :
1420         case '4' :
1421         case '5' :
1422         case '6' :
1423         case '7' :
1424         case '8' :
1425         case '9' :
1426             // first try a signed integer, then a unsigned integer, then a double
1427             break;
1428 
1429         case '+' :
1430             // the plus sign forces a unsigned integer
1431             tSigned = false;
1432             break;
1433 
1434         case '-' :
1435             // try signed and double
1436             tUnsigned = false;
1437             break;
1438         default :
1439             AddError( _T( "Literal \'%s\' is incorrect (did you forget quotes?)"), s );
1440             return nextCh;
1441     }
1442 
1443     if ( tSigned )    {
1444     #if defined( wxJSON_64BIT_INT)
1445         r = Strtoll( s, &i64 );
1446         wxLogTrace( traceMask, _T("(%s) convert to wxInt64 result=%d"),
1447                   __PRETTY_FUNCTION__, r );
1448         if ( r )  {
1449             // store the value
1450             val = i64;
1451             return nextCh;
1452         }
1453     #else
1454         r = s.ToLong( &l );
1455         wxLogTrace( traceMask, _T("(%s) convert to int result=%d"),
1456                  __PRETTY_FUNCTION__, r );
1457         if ( r )  {
1458             // store the value
1459             val = (int) l;
1460             return nextCh;
1461         }
1462     #endif
1463     }
1464 
1465     if ( tUnsigned )    {
1466     #if defined( wxJSON_64BIT_INT)
1467         r = Strtoull( s, &ui64 );
1468         wxLogTrace( traceMask, _T("(%s) convert to wxUint64 result=%d"),
1469                               __PRETTY_FUNCTION__, r );
1470         if ( r )  {
1471             // store the value
1472             val = ui64;
1473             return nextCh;
1474         }
1475     #else
1476         r = s.ToULong( &ul );
1477         wxLogTrace( traceMask, _T("(%s) convert to int result=%d"),
1478                          __PRETTY_FUNCTION__, r );
1479         if ( r )  {
1480             // store the value
1481             val = (unsigned int) ul;
1482             return nextCh;
1483         }
1484     #endif
1485     }
1486 
1487     if ( tDouble )    {
1488         r = s.ToDouble( &d );
1489         wxLogTrace( traceMask, _T("(%s) convert to double result=%d"),
1490                  __PRETTY_FUNCTION__, r );
1491         if ( r )  {
1492             // store the value
1493             val = d;
1494             return nextCh;
1495         }
1496     }
1497 
1498 
1499     // the value is not syntactically correct
1500     AddError( _T( "Literal \'%s\' is incorrect (did you forget quotes?)"), s );
1501     return nextCh;
1502   return nextCh;
1503 }
1504 
1505 
1506 //! Read a 4-hex-digit unicode character.
1507 /*!
1508  The function is called by ReadString() when the \b \\u sequence is
1509  encontered; the sequence introduces a control character in the form:
1510  \code
1511      \uXXXX
1512  \endcode
1513  where XXXX is a four-digit hex code..
1514  The function reads four chars from the input UTF8 stream by calling ReadChar()
1515  four times: if EOF is encontered before reading four chars, -1 is
1516  also returned and no sequence interpretation is performed.
1517  The function stores the 4 hexadecimal digits in the \c uesBuffer parameter.
1518 
1519  Returns the character after the hex sequence or -1 if EOF.
1520 
1521  \b NOTICE: although the JSON syntax states that only control characters
1522  are represented in this way, the wxJSON library reads and recognizes all
1523  unicode characters in the BMP.
1524 */
1525 int
ReadUES(wxInputStream & is,char * uesBuffer)1526 wxJSONReader::ReadUES( wxInputStream& is, char* uesBuffer )
1527 {
1528     int ch;
1529     for ( int i = 0; i < 4; i++ )  {
1530         ch = ReadChar( is );
1531         if ( ch < 0 )  {
1532             return ch;
1533         }
1534         uesBuffer[i] = (unsigned char) ch;
1535     }
1536     uesBuffer[4] = 0;    // makes a ASCIIZ string
1537 
1538     return 0;
1539 }
1540 
1541 
1542 //! The function appends a Unice Escaped Sequence to the temporary UTF8 buffer
1543 /*!
1544  This function is called by \c ReadString() when a \e unicode \e escaped
1545  \e sequence is read from the input text as for example:
1546 
1547  \code
1548   \u0001
1549  \endcode
1550 
1551  which represents a control character.
1552  The \c uesBuffer parameter contains the 4 hexadecimal digits that are
1553  read from \c ReadUES.
1554 
1555  The function tries to convert the 4 hex digits in a \b wchar_t character
1556  which is appended to the memory buffer \c utf8Buff after converting it
1557  to UTF-8.
1558 
1559  If the conversion from hexadecimal fails, the function does not
1560  store the character in the UTF-8 buffer and an error is reported.
1561  The function is the same in ANSI and Unicode.
1562  Returns -1 if the buffer does not contain valid hex digits.
1563  sequence. On success returns ZERO.
1564 
1565  @param utf8Buff    the UTF-8 buffer to which the control char is written
1566  @param uesBuffer    the four-hex-digits read from the input text
1567  @return ZERO on success, -1 if the four-hex-digit buffer cannot be converted
1568 */
1569 int
AppendUES(wxMemoryBuffer & utf8Buff,const char * uesBuffer)1570 wxJSONReader::AppendUES( wxMemoryBuffer& utf8Buff, const char* uesBuffer )
1571 {
1572     unsigned long l;
1573     int r = sscanf( uesBuffer, "%lx", &l );    // r is the assigned items
1574     if ( r != 1  )  {
1575         AddError( _T( "Invalid Unicode Escaped Sequence"));
1576         return -1;
1577     }
1578     wxLogTrace( traceMask, _T("(%s) unicode sequence=%s code=%ld"),
1579               __PRETTY_FUNCTION__, uesBuffer, l );
1580 
1581     wchar_t ch = (wchar_t) l;
1582     char buffer[16];
1583     size_t len = wxConvUTF8.FromWChar( buffer, 10, &ch, 1 );
1584 
1585     // seems that the wxMBConv classes always appends a NULL byte to
1586     // the converted buffer
1587     if ( len > 1 )    {
1588         len = len - 1;
1589     }
1590     utf8Buff.AppendData( buffer, len );
1591 
1592     // sould never fail
1593     wxASSERT( len != wxCONV_FAILED );
1594     return 0;
1595 }
1596 
1597 //! Store the comment string in the value it refers to.
1598 /*!
1599  The function searches a suitable value object for storing the
1600  comment line that was read by the parser and temporarly
1601  stored in \c m_comment.
1602  The function searches the three values pointed to by:
1603  \li \c m_next
1604  \li \c m_current
1605  \li \c m_lastStored
1606 
1607  The value that the comment refers to is:
1608 
1609  \li if the comment is on the same line as one of the values, the comment
1610     refer to that value and it is stored as \b inline.
1611  \li otherwise, if the comment flag is wxJSONREADER_COMMENTS_BEFORE, the comment lines
1612     are stored in the value pointed to by \c m_next
1613  \li otherwise, if the comment flag is wxJSONREADER_COMMENTS_AFTER, the comment lines
1614     are stored in the value pointed to by \c m_current or m_latStored
1615 
1616  Note that the comment line is only stored if the wxJSONREADER_STORE_COMMENTS
1617  flag was used when the parser object was constructed; otherwise, the
1618  function does nothing and immediatly returns.
1619  Also note that if the comment line has to be stored but the
1620  function cannot find a suitable value to add the comment line to,
1621  an error is reported (note: not a warning but an error).
1622 */
1623 void
StoreComment(const wxJSONValue * parent)1624 wxJSONReader::StoreComment( const wxJSONValue* parent )
1625 {
1626     wxLogTrace( storeTraceMask, _T("(%s) m_comment=%s"),  __PRETTY_FUNCTION__, m_comment.c_str());
1627     wxLogTrace( storeTraceMask, _T("(%s) m_flags=%d m_commentLine=%d"),
1628               __PRETTY_FUNCTION__, m_flags, m_commentLine );
1629     wxLogTrace( storeTraceMask, _T("(%s) m_current=%p"), __PRETTY_FUNCTION__, m_current );
1630     wxLogTrace( storeTraceMask, _T("(%s) m_next=%p"), __PRETTY_FUNCTION__, m_next );
1631     wxLogTrace( storeTraceMask, _T("(%s) m_lastStored=%p"), __PRETTY_FUNCTION__, m_lastStored );
1632 
1633     // first check if the 'store comment' bit is on
1634     if ( (m_flags & wxJSONREADER_STORE_COMMENTS) == 0 )  {
1635         m_comment.clear();
1636         return;
1637     }
1638 
1639     // check if the comment is on the same line of one of the
1640     // 'current', 'next' or 'lastStored' value
1641     if ( m_current != 0 )  {
1642         wxLogTrace( storeTraceMask, _T("(%s) m_current->lineNo=%d"),
1643              __PRETTY_FUNCTION__, m_current->GetLineNo() );
1644         if ( m_current->GetLineNo() == m_commentLine ) {
1645             wxLogTrace( storeTraceMask, _T("(%s) comment added to \'m_current\' INLINE"),
1646              __PRETTY_FUNCTION__ );
1647             m_current->AddComment( m_comment, wxJSONVALUE_COMMENT_INLINE );
1648             m_comment.clear();
1649             return;
1650         }
1651     }
1652     if ( m_next != 0 )  {
1653         wxLogTrace( storeTraceMask, _T("(%s) m_next->lineNo=%d"),
1654              __PRETTY_FUNCTION__, m_next->GetLineNo() );
1655         if ( m_next->GetLineNo() == m_commentLine ) {
1656             wxLogTrace( storeTraceMask, _T("(%s) comment added to \'m_next\' INLINE"),
1657                  __PRETTY_FUNCTION__ );
1658             m_next->AddComment( m_comment, wxJSONVALUE_COMMENT_INLINE );
1659             m_comment.clear();
1660             return;
1661         }
1662     }
1663     if ( m_lastStored != 0 )  {
1664         wxLogTrace( storeTraceMask, _T("(%s) m_lastStored->lineNo=%d"),
1665              __PRETTY_FUNCTION__, m_lastStored->GetLineNo() );
1666         if ( m_lastStored->GetLineNo() == m_commentLine ) {
1667             wxLogTrace( storeTraceMask, _T("(%s) comment added to \'m_lastStored\' INLINE"),
1668                  __PRETTY_FUNCTION__ );
1669             m_lastStored->AddComment( m_comment, wxJSONVALUE_COMMENT_INLINE );
1670             m_comment.clear();
1671             return;
1672         }
1673     }
1674 
1675     // if comment is BEFORE, store the comment in the 'm_next'
1676     // or 'm_current' value
1677     // if comment is AFTER, store the comment in the 'm_lastStored'
1678     // or 'm_current' value
1679 
1680     if ( m_flags & wxJSONREADER_COMMENTS_AFTER )  {  // comment AFTER
1681         if ( m_current )  {
1682             if ( m_current == parent || !m_current->IsValid()) {
1683                 AddError( _T("Cannot find a value for storing the comment (flag AFTER)"));
1684             }
1685             else  {
1686                 wxLogTrace( storeTraceMask, _T("(%s) comment added to m_current (AFTER)"),
1687                      __PRETTY_FUNCTION__ );
1688                 m_current->AddComment( m_comment, wxJSONVALUE_COMMENT_AFTER );
1689             }
1690         }
1691         else if ( m_lastStored )  {
1692             wxLogTrace( storeTraceMask, _T("(%s) comment added to m_lastStored (AFTER)"),
1693                  __PRETTY_FUNCTION__ );
1694             m_lastStored->AddComment( m_comment, wxJSONVALUE_COMMENT_AFTER );
1695         }
1696         else   {
1697             wxLogTrace( storeTraceMask,
1698                 _T("(%s) cannot find a value for storing the AFTER comment"), __PRETTY_FUNCTION__ );
1699             AddError(_T("Cannot find a value for storing the comment (flag AFTER)"));
1700         }
1701     }
1702     else {       // comment BEFORE can only be added to the 'next' value
1703         if ( m_next )  {
1704             wxLogTrace( storeTraceMask, _T("(%s) comment added to m_next (BEFORE)"),
1705                  __PRETTY_FUNCTION__ );
1706             m_next->AddComment( m_comment, wxJSONVALUE_COMMENT_BEFORE );
1707         }
1708         else   {
1709             // cannot find a value for storing the comment
1710             AddError(_T("Cannot find a value for storing the comment (flag BEFORE)"));
1711         }
1712     }
1713     m_comment.clear();
1714 }
1715 
1716 
1717 //! Return the number of bytes that make a character in stream input
1718 /*!
1719  This function returns the number of bytes that represent a unicode
1720  code point in various encoding.
1721  For example, if the input stream is UTF-32 the function returns 4.
1722  Because the only recognized format for streams is UTF-8 the function
1723  just calls UTF8NumBytes() and returns.
1724  The function is, actually, not used at all.
1725 
1726 */
1727 int
NumBytes(char ch)1728 wxJSONReader::NumBytes( char ch )
1729 {
1730     int n = UTF8NumBytes( ch );
1731     return n;
1732 }
1733 
1734 //! Compute the number of bytes that makes a UTF-8 encoded wide character.
1735 /*!
1736  The function counts the number of '1' bit in the character \c ch and
1737  returns it.
1738  The UTF-8 encoding specifies the number of bytes needed by a wide character
1739  by coding it in the first byte. See below.
1740 
1741  Note that if the character does not contain a valid UTF-8 encoding
1742  the function returns -1.
1743 
1744 \code
1745    UCS-4 range (hex.)    UTF-8 octet sequence (binary)
1746    -------------------   -----------------------------
1747    0000 0000-0000 007F   0xxxxxxx
1748    0000 0080-0000 07FF   110xxxxx 10xxxxxx
1749    0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1750    0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1751    0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1752    0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
1753 \endcode
1754 */
1755 int
UTF8NumBytes(char ch)1756 wxJSONReader::UTF8NumBytes( char ch )
1757 {
1758     int num = 0;    // the counter of '1' bits
1759     for ( int i = 0; i < 8; i++ )  {
1760         if ( (ch & 0x80) == 0 )  {
1761             break;
1762         }
1763         ++num;
1764         ch = ch << 1;
1765     }
1766 
1767     // note that if the char contains more than six '1' bits it is not
1768     // a valid UTF-8 encoded character
1769     if ( num > 6 )  {
1770         num = -1;
1771     }
1772     else if ( num == 0 )  {
1773         num = 1;
1774     }
1775     return num;
1776 }
1777 
1778 //! Convert a UTF-8 memory buffer one char at a time
1779 /*!
1780  This function is used in ANSI mode when input from a stream is in UTF-8
1781  format and the UTF-8 buffer read cannot be converted to the locale
1782  wxString object.
1783  The function performs a char-by-char conversion of the buffer and appends
1784  every representable character to the string \c s.
1785  Characters that cannot be represented are stored as \e unicode \e escaped
1786  \e sequences in the form:
1787  \code
1788    \uXXXX
1789  \endcode
1790  where XXXX is a for-hex-digits Unicode code point.
1791  The function returns the number of characters that cannot be represented
1792  in the current locale.
1793 */
1794 int
ConvertCharByChar(wxString & s,const wxMemoryBuffer & utf8Buffer)1795 wxJSONReader::ConvertCharByChar( wxString& s, const wxMemoryBuffer& utf8Buffer )
1796 {
1797     size_t len  = utf8Buffer.GetDataLen();
1798     char*  buff = (char*) utf8Buffer.GetData();
1799     char* buffEnd = buff + len;
1800 
1801     int result = 0;
1802     char temp[16];    // the UTF-8 code-point
1803 
1804     while ( buff < buffEnd )    {
1805         temp[0] = *buff;    // the first UTF-8 code-unit
1806         // compute the number of code-untis that make one UTF-8 code-point
1807         int numBytes = NumBytes( *buff );
1808         ++buff;
1809         for ( int i = 1; i < numBytes; i++ )    {
1810             if ( buff >= buffEnd )    {
1811                 break;
1812             }
1813             temp[i] = *buff;    // the first UTF-8 code-unit
1814             ++buff;
1815         }
1816         //if ( buff >= buffEnd )    {
1817         //    break;
1818         //}
1819         // now convert 'temp' to a wide-character
1820         wchar_t dst[10];
1821         size_t outLength = wxConvUTF8.ToWChar( dst, 10, temp, numBytes );
1822 
1823         // now convert the wide char to a locale dependent character
1824         // len = wxConvLocal.FromWChar( temp, 16, dst, outLength );
1825         // len = wxConviso8859_1.FromWChar( temp, 16, dst, outLength );
1826         len = wxConvLibc.FromWChar( temp, 16, dst, outLength );
1827         if ( len == wxCONV_FAILED )    {
1828             ++result;
1829             wxString t;
1830             t.Printf( _T( "\\u%04X"), (int) dst[0] );
1831             s.Append( t );
1832         }
1833         else    {
1834             s.Append( temp[0], 1 );
1835         }
1836     }        // end while
1837     return result;
1838 }
1839 
1840 //! Read a memory buffer type
1841 /*!
1842  This function is called by DoRead() when the single-quote character is
1843  encontered which starts a \e memory \e buffer type.
1844  This type is a \b wxJSON extension so the function emits a warning
1845  when such a type encontered.
1846  If the reader is constructed without the \c wxJSONREADER_MEMORYBUFF flag
1847  then the warning becomes an error.
1848  To know more about this JSON syntax extension read \ref wxjson_tutorial_memorybuff
1849 
1850  @param is the input stream
1851  @param val the JSON value that will hold the memory buffer value
1852  @return the last char read or -1 in case of EOF
1853 */
1854 
1855 union byte_union
1856 {
1857     unsigned char cu[2];
1858     short int bu;
1859 };
1860 
1861 int
ReadMemoryBuff(wxInputStream & is,wxJSONValue & val)1862 wxJSONReader::ReadMemoryBuff( wxInputStream& is, wxJSONValue& val )
1863 {
1864     static const wxChar* membuffError = _T("the \'memory buffer\' type contains %d invalid digits" );
1865 
1866     AddWarning( wxJSONREADER_MEMORYBUFF, _T( "the \'memory buffer\' type is not valid JSON text" ));
1867 
1868     wxMemoryBuffer buff;
1869     int ch = 0; int errors = 0;
1870     unsigned char byte = 0;
1871     while ( ch >= 0 ) {
1872         ch = ReadChar( is );
1873         if ( ch < 0 )  {
1874             break;
1875         }
1876         if ( ch == '\'' )  {
1877             break;
1878         }
1879         // the conversion is done two chars at a time
1880         unsigned char c1 = (unsigned char) ch;
1881         ch = ReadChar( is );
1882         if ( ch < 0 )  {
1883             break;
1884         }
1885         unsigned char c2 = (unsigned char) ch;
1886         c1 -= '0';
1887         c2 -= '0';
1888         if ( c1 > 9 )  {
1889             c1 -= 7;
1890         }
1891         if ( c2 > 9 )  {
1892             c2 -= 7;
1893         }
1894         if ( c1 > 15 )  {
1895             ++errors;
1896         }
1897         else if ( c2 > 15 )  {
1898             ++errors;
1899         }
1900         else {
1901             byte = (c1 * 16) + c2;
1902             buff.AppendByte( byte );
1903         }
1904     }   // end while
1905 
1906     if ( errors > 0 )  {
1907         wxString err;
1908         err.Printf( membuffError, errors );
1909         AddError( err );
1910     }
1911 
1912 
1913     // now assign the memory buffer object to the JSON-value 'value'
1914     // must check that:
1915     //   'value'  is invalid OR
1916     //   'value'  is a memory buffer; concatenate it
1917     if ( !val.IsValid() )   {
1918         wxLogTrace( traceMask, _T("(%s) assigning the memory buffer to value"), __PRETTY_FUNCTION__ );
1919         val = buff ;
1920     }
1921     else if ( val.IsMemoryBuff() )  {
1922         wxLogTrace( traceMask, _T("(%s) concatenate memory buffer to value"), __PRETTY_FUNCTION__ );
1923         val.Cat( buff );
1924     }
1925     else  {
1926         AddError( _T( "Memory buffer value cannot follow another value") );
1927     }
1928 
1929     // store the input text's line number when the string was stored in 'val'
1930     val.SetLineNo( m_lineNo );
1931 
1932     // read the next char after the closing quotes and returns it
1933     if ( ch >= 0 )  {
1934         ch = ReadChar( is );
1935     }
1936     return ch;
1937 }
1938 
1939 
1940 
1941 
1942 #if defined( wxJSON_64BIT_INT )
1943 //! Converts a decimal string to a 64-bit signed integer
1944 /*!
1945  This function implements a simple variant
1946  of the \b strtoll C-library function.
1947  I needed this implementation because the wxString::To(U)LongLong
1948  function does not work on my system:
1949 
1950   \li GNU/Linux Fedora Core 6
1951   \li GCC version 4.1.1
1952   \li libc.so.6
1953 
1954  The wxWidgets library (actually I have installed version 2.8.7)
1955  relies on \b strtoll in order to do the conversion from a string
1956  to a long long integer but, in fact, it does not work because
1957  the 'wxHAS_STRTOLL' macro is not defined on my system.
1958  The problem only affects the Unicode builds while it seems
1959  that the wxString::To(U)LongLong function works in ANSI builds.
1960 
1961  Note that this implementation is not a complete substitute of the
1962  strtoll function because it only converts decimal strings (only base
1963  10 is implemented).
1964 
1965  @param str the string that contains the decimal literal
1966  @param i64 the pointer to long long which holds the converted value
1967 
1968  @return TRUE if the conversion succeeds
1969 */
1970 bool
Strtoll(const wxString & str,wxInt64 * i64)1971 wxJSONReader::Strtoll( const wxString& str, wxInt64* i64 )
1972 {
1973     wxChar sign = ' ';
1974     wxUint64 ui64;
1975     bool r = DoStrto_ll( str, &ui64, &sign );
1976 
1977     if ( r) {
1978         // check overflow for signed long long
1979         switch ( sign )  {
1980             case '-' :
1981                 if ( ui64 > (wxUint64) LLONG_MAX + 1 )  {
1982                     r = false;
1983                 }
1984                 else  {
1985                     *i64 = (wxInt64) (ui64 * -1);
1986                 }
1987                 break;
1988 
1989             // case '+' :
1990             default :
1991                 if ( ui64 > LLONG_MAX )  {
1992                     r = false;
1993                 }
1994                 else  {
1995                     *i64 = (wxInt64) ui64;
1996                 }
1997                 break;
1998         }
1999     }
2000     return r;
2001 }
2002 
2003 
2004 //! Converts a decimal string to a 64-bit unsigned integer.
2005 /*!
2006  Similar to \c Strtoll but for unsigned integers
2007 */
2008 bool
Strtoull(const wxString & str,wxUint64 * ui64)2009 wxJSONReader::Strtoull( const wxString& str, wxUint64* ui64 )
2010 {
2011     wxChar sign = ' ';
2012     bool r = DoStrto_ll( str, ui64, &sign );
2013     if ( sign == '-' )  {
2014         r = false;
2015     }
2016     return r;
2017 }
2018 
2019 //! Perform the actual conversion from a string to a 64-bit integer
2020 /*!
2021  This function is called internally by the \c Strtoll and \c Strtoull functions
2022  and it does the actual conversion.
2023  The function is also able to check numeric overflow.
2024 
2025  @param str the string that has to be converted
2026  @param ui64 the pointer to a unsigned long long that holds the converted value
2027  @param sign the pointer to a wxChar character that will get the sign of the literal string, if any
2028  @return TRUE if the conversion succeeds
2029 */
2030 bool
DoStrto_ll(const wxString & str,wxUint64 * ui64,wxChar * sign)2031 wxJSONReader::DoStrto_ll( const wxString& str, wxUint64* ui64, wxChar* sign )
2032 {
2033   // the conversion is done by multiplying the individual digits
2034   // in reverse order to the corresponding power of 10
2035   //
2036   //  10's power:  987654321.9876543210
2037   //
2038   // LLONG_MAX:     9223372036854775807
2039   // LLONG_MIN:    -9223372036854775808
2040   // ULLONG_MAX:   18446744073709551615
2041   //
2042   // the function does not take into account the sign: only a
2043   // unsigned long long int is returned
2044 
2045     int maxDigits = 20;       // 20 + 1 (for the sign)
2046 
2047     wxUint64 power10[] = {
2048     wxULL(1),
2049     wxULL(10),
2050     wxULL(100),
2051     wxULL(1000),
2052     wxULL(10000),
2053     wxULL(100000),
2054     wxULL(1000000),
2055     wxULL(10000000),
2056     wxULL(100000000),
2057     wxULL(1000000000),
2058     wxULL(10000000000),
2059     wxULL(100000000000),
2060     wxULL(1000000000000),
2061     wxULL(10000000000000),
2062     wxULL(100000000000000),
2063     wxULL(1000000000000000),
2064     wxULL(10000000000000000),
2065     wxULL(100000000000000000),
2066     wxULL(1000000000000000000),
2067     wxULL(10000000000000000000)
2068   };
2069 
2070 
2071     wxUint64 temp1 = wxULL(0);   // the temporary converted integer
2072 
2073     int strLen = str.length();
2074     if ( strLen == 0 )  {
2075         // an empty string is converted to a ZERO value: the function succeeds
2076         *ui64 = wxLL(0);
2077         return true;
2078     }
2079 
2080     int index = 0;
2081     wxChar ch = str[0];
2082     if ( ch == '+' || ch == '-' )  {
2083         *sign = ch;
2084         ++index;
2085         ++maxDigits;
2086     }
2087 
2088     if ( strLen > maxDigits )  {
2089         return false;
2090     }
2091 
2092     // check the overflow: check the string length and the individual digits
2093     // of the string; the overflow is checked for unsigned long long
2094     if ( strLen == maxDigits )  {
2095         wxString uLongMax( _T("18446744073709551615"));
2096         int j = 0;
2097         for ( int i = index; i < strLen - 1; i++ )  {
2098             ch = str[i];
2099             if ( ch < '0' || ch > '9' ) {
2100                 return false;
2101             }
2102             if ( ch > uLongMax[j] ) {
2103                 return false;
2104             }
2105             if ( ch < uLongMax[j] ) {
2106                 break;
2107             }
2108             ++j;
2109         }
2110     }
2111 
2112     // get the digits in the reverse order and multiply them by the
2113     // corresponding power of 10
2114     int exponent = 0;
2115     for ( int i = strLen - 1; i >= index; i-- )   {
2116         wxChar ch = str[i];
2117         if ( ch < '0' || ch > '9' ) {
2118             return false;
2119         }
2120         ch = ch - '0';
2121         // compute the new temporary value
2122         temp1 += ch * power10[exponent];
2123         ++exponent;
2124     }
2125     *ui64 = temp1;
2126     return true;
2127 }
2128 
2129 #endif       // defined( wxJSON_64BIT_INT )
2130 
2131 /*
2132 {
2133 }
2134 */
2135 
2136 
2137 
2138