1 /////////////////////////////////////////////////////////////////////////////
2 // Name:        jsonreader.cpp
3 // Purpose:     the wxJSONReader class: a JSON text parser
4 // Author:      Luciano Cattani
5 // Created:     2007/10/14
6 // RCS-ID:      $Id: jsonreader.cpp,v 1.12 2008/03/12 10:48:19 luccat Exp $
7 // Copyright:   (c) 2007 Luciano Cattani
8 // Licence:     wxWidgets licence
9 /////////////////////////////////////////////////////////////////////////////
10 
11 #ifdef __GNUG__
12     #pragma implementation "jsonreader.cpp"
13 #endif
14 
15 #include "jsonreader.h"
16 
17 #include <wx/mstream.h>
18 #include <wx/sstream.h>
19 #include <wx/debug.h>
20 #include <wx/log.h>
21 
22 
23 
24 /*! \class wxJSONReader
25  \brief The JSON parser
26 
27  The class is a JSON parser which reads a JSON formatted text and stores
28  values in the \c wxJSONValue structure.
29  The ctor accepts two parameters: the \e style flag, which controls how
30  much error-tolerant should the parser be and an integer which is
31  the maximum number of errors and warnings that have to be reported
32  (the default is 30).
33 
34  If the JSON text document does not contain an open/close JSON character the
35  function returns an \b invalid value object; in other words, the
36  wxJSONValue::IsValid() function returns FALSE.
37  This is the case of a document that is empty or contains only
38  whitespaces or comments.
39  If the document contains a starting object/array character immediatly
40  followed by a closing object/array character
41  (i.e.: \c {}) then the function returns an \b empty array or object
42  JSON value.
43  This is a valid JSON object of type wxJSONTYPE_OBJECT or wxJSONTYPE_ARRAY
44  whose wxJSONValue::Size() function returns ZERO.
45 
46  \par JSON text
47 
48  The wxJSON parser just skips all characters read from the
49  input JSON text until the start-object '{' or start-array '[' characters
50  are encontered (see the GetStart() function).
51  This means that the JSON input text may contain anything
52  before the first start-object/array character except these two chars themselves
53  unless they are included in a C/C++ comment.
54  Comment lines that apear before the first start array/object character,
55  are non ignored if the parser is constructed with the wxJSONREADER_STORE_COMMENT
56  flag: they are added to the comment's array of the root JSON value.
57 
58  Note that the parsing process stops when the internal DoRead() function
59  returns. Because that function is recursive, the top-level close-object
60  '}' or close-array ']' character cause the top-level DoRead() function
61  to return thus stopping the parsing process regardless the EOF condition.
62  This means that the JSON input text may contain anything \b after
63  the top-level close-object/array character.
64  Here are some examples:
65 
66  Returns a wxJSONTYPE_INVALID value (invalid JSON value)
67  \code
68    // this text does not contain an open array/object character
69  \endcode
70 
71  Returns a wxJSONTYPE_OBJECT value of Size() = 0
72  \code
73    {
74    }
75  \endcode
76 
77  Returns a wxJSONTYPE_ARRAY value of Size() = 0
78  \code
79    [
80    ]
81  \endcode
82 
83  Text before and after the top-level open/close characters is ignored.
84  \code
85    This non-JSON text does not cause the parser to report errors or warnings
86    {
87    }
88    This non-JSON text does not cause the parser to report errors or warnings
89  \endcode
90 
91 
92  \par Extensions
93 
94  The wxJSON parser recognizes all JSON text plus some extensions
95  that are not part of the JSON syntax but that many other JSON
96  implementations do recognize.
97  If the input text contains the following non-JSON text, the parser
98  reports the situation as \e warnings and not as \e errors unless
99  the parser object was constructed with the wxJSONREADER_STRICT
100  flag. In the latter case the wxJSON parser is not tolerant.
101 
102  \li C/C++ comments: the parser recognizes C and C++ comments.
103     Comments can optionally be stored in the value they refer
104     to and can also be written back to the JSON text document.
105     To know more about comment storage see \ref wxjson_comments
106 
107  \li case tolerance: JSON syntax states that the literals \c null,
108     \c true and \c false must be lowercase; the wxJSON parser
109     also recognizes mixed case literals such as, for example,
110     \b Null or \b FaLSe.  A \e warning is emitted.
111 
112  \li wrong or missing closing character: wxJSON parser is tolerant
113     about the object / array closing character. When an open-array
114     character '[' is encontered, the parser expects the
115     corresponding close-array character ']'. If the character
116     encontered is a close-object char '}' a warning is reported.
117     A warning is also reported if the character is missing when
118     the end-of-file is reached.
119 
120  \li multi-line strings: this feature allows a JSON string type to be
121     splitted in two or more lines as in the standard C/C++
122     languages. The drawback is that this feature is error-prone
123     and you have to use it with care.
124     For more info about this topic read \ref wxjson_tutorial_style_split
125 
126  Note that you can control how much error-tolerant should the parser be
127  and also you can specify how many and what extensions are recognized.
128  See the constructor's parameters for more details.
129 
130  \par Unicode vs ANSI
131 
132  The parser can read JSON text from two very different kind of objects:
133 
134  \li a string object (\b wxString)
135  \li a stream object (\b wxInputStream)
136 
137  When the input is from a string object, the character represented in the
138  string is platform- and mode- dependant; in other words, characters are
139  represented differently: in ANSI builds they depend on the charset in use
140  and in Unicode builds they depend on the platform (UCS-2 on win32, UCS-4
141  or UTF-8 on GNU/Linux).
142 
143  When the input is from a stream object, the only recognized encoding format
144  is UTF-8 for both ANSI and Unicode builds.
145 
146  \par Example:
147 
148  \code
149   wxJSONValue  value;
150   wxJSONReader reader;
151 
152   // open a text file that contains the UTF-8 encoded JSON text
153   wxFFileInputStream jsonText(_T("filename.utf8"), _T("r"));
154 
155   // read the file
156   int numErrors = reader.Parse(jsonText, &value);
157 
158   if (numErrors > 0)  {
159     ::MessageBox(_T("Error reading the input file"));
160   }
161  \endcode
162 
163  Starting from version 1.1.0 the wxJSON reader and the writer has changed in
164  their internal organization.
165  To know more about ANSI and Unicode mode read \ref wxjson_tutorial_unicode.
166 */
167 
168 
169 
170 // if you have the debug build of wxWidgets and wxJSON you can see
171 // trace messages by setting the:
172 // WXTRACE=traceReader StoreComment
173 // environment variable
174 #if defined(JSONDEBUG)
175 static const wxChar* traceMask = _T("traceReader");
176 static const wxChar* storeTraceMask = _T("StoreComment");
177 #endif
178 
179 
180 //! Ctor
181 /*!
182  Construct a JSON parser object with the given parameters.
183 
184  JSON parser objects should always be constructed on the stack but
185  it does not hurt to have a global JSON parser.
186 
187  \param flags this paramter controls how much error-tolerant should the
188         parser be
189 
190  \param maxErrors the maximum number of errors (and warnings, too) that are
191     reported by the parser. When the number of errors reaches this limit,
192     the parser stops to read the JSON input text and no other error is
193     reported.
194 
195  The \c flag parameter is the combination of ZERO or more of the
196  following constants OR'ed toghether:
197 
198  \li wxJSONREADER_ALLOW_COMMENTS: C/C++ comments are recognized by the
199      parser; a warning is reported by the parser
200  \li wxJSONREADER_STORE_COMMENTS: C/C++ comments, if recognized, are
201      stored in the value they refer to and can be rewritten back to
202      the JSON text
203  \li wxJSONREADER_CASE: the parser recognizes mixed-case literal strings
204  \li wxJSONREADER_MISSING: the parser allows missing or wrong close-object
205      and close-array characters
206  \li wxJSONREADER_MULTISTRING: strings may be splitted in two or more
207      lines
208  \li wxJSONREADER_COMMENTS_AFTER: if STORE_COMMENTS if defined, the parser
209      assumes that comment lines apear \b before the value they
210      refer to unless this constant is specified. In the latter case,
211      comments apear \b after the value they refer to.
212  \li wxJSONREADER_NOUTF8_STREAM: suppress UTF-8 conversion when reading a
213          string value from a stream: the reader assumes that the input stream
214          is encoded in ANSI format and not in UTF-8; only meaningfull in ANSI
215          builds, this flag is simply ignored in Unicode builds.
216 
217  You can also use the following shortcuts to specify some predefined
218  flag's combinations:
219 
220   \li wxJSONREADER_STRICT: all wxJSON extensions are reported as errors, this
221       is the same as specifying a ZERO value as \c flags.
222   \li wxJSONREADER_TOLERANT: this is the same as ALLOW_COMMENTS | CASE |
223       MISSING | MULTISTRING; all wxJSON extensions are turned on but comments
224       are not stored in the value objects.
225 
226  \par Example:
227 
228  The following code fragment construct a JSON parser, turns on all
229  wxJSON extensions and also stores C/C++ comments in the value object
230  they refer to. The parser assumes that the comments apear before the
231  value:
232 
233  \code
234    wxJSONReader reader(wxJSONREADER_TOLERANT | wxJSONREADER_STORE_COMMENTS);
235    wxJSONValue  root;
236    int numErrors = reader.Parse(jsonText, &root);
237  \endcode
238 */
wxJSONReader(int flags,int maxErrors)239 wxJSONReader::wxJSONReader(int flags, int maxErrors) {
240     m_flags     = flags;
241     m_maxErrors = maxErrors;
242     m_noUtf8    = false;
243 #if !defined(wxJSON_USE_UNICODE)
244     // in ANSI builds we can suppress UTF-8 conversion for both the writer and the reader
245     if (m_flags & wxJSONREADER_NOUTF8_STREAM)    {
246         m_noUtf8 = true;
247     }
248 #endif
249 }
250 
251 //! Dtor - does nothing
~wxJSONReader()252 wxJSONReader::~wxJSONReader() {
253 }
254 
255 //! Parse the JSON document.
256 /*!
257  The two overloaded versions of the \c Parse() function read a
258  JSON text stored in a wxString object or in a wxInputStream
259  object.
260 
261  If \c val is a NULL pointer, the function does not store the
262  values: it can be used as a JSON checker in order to check the
263  syntax of the document.
264  Returns the number of \b errors found in the document.
265  If the returned value is ZERO and the parser was constructed
266  with the \c wxJSONREADER_STRICT flag, then the parsed document
267  is \e well-formed and it only contains valid JSON text.
268 
269  If the \c wxJSONREADER_TOLERANT flag was used in the parser's
270  constructor, then a return value of ZERO
271  does not mean that the document is \e well-formed because it may
272  contain comments and other extensions that are not fatal for the
273  wxJSON parser but other parsers may fail to recognize.
274  You can use the \c GetWarningCount() function to know how many
275  wxJSON extensions are present in the JSON input text.
276 
277  Note that the JSON value object \c val is not cleared by this
278  function unless its type is of the wrong type.
279  In other words, if \c val is of type wxJSONTYPE_ARRAY and it already
280  contains 10 elements and the input document starts with a
281  '[' (open-array char) then the elements read from the document are
282  \b appended to the existing ones.
283 
284  On the other hand, if the text document starts with a '{' (open-object) char
285  then this function must change the type of the \c val object to
286  \c wxJSONTYPE_OBJECT and the old content of 10 array elements will be lost.
287 
288  \par Different input types
289 
290  The real parsing process in done using UTF-8 streams. If the input is
291  from a \b wxString object, the Parse function first converts the input string
292  in a temporary \b wxMemoryInputStream which contains the UTF-8 conversion
293  of the string itself.
294  Next, the overloaded Parse function is called.
295 
296  @param doc    the JSON text that has to be parsed
297  @param val    the wxJSONValue object that contains the parsed text; if NULL the
298          parser do not store anything but errors and warnings are reported
299  @return the total number of errors encontered
300 */
301 int
Parse(const wxString & doc,wxJSONValue * val)302 wxJSONReader:: Parse(const wxString& doc, wxJSONValue* val) {
303 #if !defined(wxJSON_USE_UNICODE)
304     // in ANSI builds input from a string never use UTF-8 conversion
305     bool noUtf8_bak = m_noUtf8;        // save the current setting
306     m_noUtf8 = true;
307 #endif
308 
309     // convert the string to a UTF-8 / ANSI memory stream and calls overloaded Parse()
310     char* readBuff = 0;
311     wxCharBuffer utf8CB = doc.ToUTF8();        // the UTF-8 buffer
312 #if !defined(wxJSON_USE_UNICODE)
313     wxCharBuffer ansiCB(doc.c_str());        // the ANSI buffer
314     if (m_noUtf8)    {
315         readBuff = ansiCB.data();
316     } else {
317         readBuff = utf8CB.data();
318     }
319 #else
320         readBuff = utf8CB.data();
321 #endif
322 
323     // now construct the temporary memory input stream
324     size_t len = strlen(readBuff);
325     wxMemoryInputStream is(readBuff, len);
326 
327     int numErr = Parse(is, val);
328 #if !defined(wxJSON_USE_UNICODE)
329     m_noUtf8 = noUtf8_bak;
330 #endif
331     return numErr;
332 }
333 
334 //! \overload Parse(const wxString&, wxJSONValue*)
335 int
Parse(wxInputStream & is,wxJSONValue * val)336 wxJSONReader::Parse(wxInputStream& is, wxJSONValue* val) {
337     // if val == 0 the 'temp' JSON value will be passed to DoRead()
338     wxJSONValue temp;
339     m_level    = 0;
340     m_depth    = 0;
341     m_lineNo   = 1;
342     m_colNo    = 1;
343     m_peekChar = -1;
344     m_errors.clear();
345     m_warnings.clear();
346 
347     // if a wxJSONValue is not passed to the Parse function
348     // we set the temparary object created on the stack
349     // I know this will slow down the validation of input
350     if (val == 0)  {
351         val = &temp;
352     }
353     wxASSERT(val);
354 
355     // set the wxJSONValue object's pointers for comment storage
356     m_next       = val;
357     m_next->SetLineNo(-1);
358     m_lastStored = 0;
359     m_current    = 0;
360 
361     int ch = GetStart(is);
362     switch (ch)  {
363         case '{' :
364         val->SetType(wxJSONTYPE_OBJECT);
365         break;
366     case '[' :
367         val->SetType(wxJSONTYPE_ARRAY);
368         break;
369     default :
370         AddError(_T("Cannot find a start object/array character"));
371         return m_errors.size();
372         break;
373     }
374 
375     // returning from DoRead() could be for EOF or for
376     // the closing array-object character
377     // if -1 is returned, it is as an error because the lack
378     // of close-object/array characters
379     // note that the missing close-chars error messages are
380     // added by the DoRead() function
381     ch = DoRead(is, *val);
382     return m_errors.size();
383 }
384 
385 
386 //! Returns the start of the document
387 /*!
388  This is the first function called by the Parse() function and it searches
389  the input stream for the starting character of a JSON text and returns it.
390  JSON text start with '{' or '['.
391  If the two starting characters are inside a C/C++ comment, they
392  are ignored.
393  Returns the JSON-text start character or -1 on EOF.
394 
395  @param is    the input stream that contains the JSON text
396  @return -1 on errors or EOF; one of '{' or '['
397 */
398 int
GetStart(wxInputStream & is)399 wxJSONReader::GetStart(wxInputStream& is) {
400     int ch = 0;
401     do  {
402         switch (ch)  {
403             case 0 :
404                 ch = ReadChar(is);
405                 break;
406             case '{' :
407                 return ch;
408                 break;
409             case '[' :
410                 return ch;
411                 break;
412             case '/' :
413                 ch = SkipComment(is);
414                 StoreComment(0);
415                 break;
416             default :
417                 ch = ReadChar(is);
418                 break;
419         }
420     } while (ch >= 0);
421     return ch;
422 }
423 
424 //! Return a reference to the error message's array.
425 const wxArrayString&
GetErrors() const426 wxJSONReader::GetErrors() const {
427     return m_errors;
428 }
429 
430 //! Return a reference to the warning message's array.
431 const wxArrayString&
GetWarnings() const432 wxJSONReader::GetWarnings() const {
433     return m_warnings;
434 }
435 
436 //! Return the depth of the JSON input text
437 /*!
438  The function returns the number of times the recursive \c DoRead function was
439  called in the parsing process thus returning the maximum depth of the JSON
440  input text.
441 */
442 int
GetDepth() const443 wxJSONReader::GetDepth() const {
444     return m_depth;
445 }
446 
447 
448 
449 //! Return the size of the error message's array.
450 int
GetErrorCount() const451 wxJSONReader::GetErrorCount() const {
452     return m_errors.size();
453 }
454 
455 //! Return the size of the warning message's array.
456 int
GetWarningCount() const457 wxJSONReader::GetWarningCount() const {
458     return m_warnings.size();
459 }
460 
461 
462 //! Read a character from the input JSON document.
463 /*!
464  The function returns the next byte from the UTF-8 stream as an INT.
465  In case of errors or EOF, the function returns -1.
466  The function also updates the \c m_lineNo and \c m_colNo data
467  members and converts all CR+LF sequence in LF.
468 
469  This function only returns one byte UTF-8 (one code unit)
470  at a time and not Unicode code points.
471  The only reason for this function is to process line and column
472  numbers.
473 
474  @param is    the input stream that contains the JSON text
475  @return the next char (one single byte) in the input stream or -1 on error or EOF
476 */
477 int
ReadChar(wxInputStream & is)478 wxJSONReader::ReadChar(wxInputStream& is) {
479     if (is.Eof())    {
480         return -1;
481     }
482 
483     unsigned char ch = is.GetC();
484     size_t last = is.LastRead();    // returns ZERO if EOF
485     if (last == 0)    {
486         return -1;
487     }
488 
489     // the function also converts CR in LF. only LF is returned
490     // in the case of CR+LF
491     int nextChar;
492 
493     if (ch == '\r')  {
494         m_colNo = 1;
495         nextChar = PeekChar(is);
496         if (nextChar == -1)  {
497             return -1;
498         } else if (nextChar == '\n')    {
499             ch = is.GetC();
500         }
501     }
502     if (ch == '\n')  {
503         ++m_lineNo;
504         m_colNo = 1;
505     } else  {
506         ++m_colNo;
507     }
508     return static_cast<int>(ch);
509 }
510 
511 
512 //! Peek a character from the input JSON document
513 /*!
514  This function just calls the \b Peek() function on the stream
515  and returns it.
516 
517  @param is    the input stream that contains the JSON text
518  @return the next char (one single byte) in the input stream or -1 on error or EOF
519 */
520 int
PeekChar(wxInputStream & is)521 wxJSONReader::PeekChar(wxInputStream& is) {
522     int ch = -1; unsigned char c;
523     if (!is.Eof())    {
524         c = is.Peek();
525         ch = c;
526     }
527     return ch;
528 }
529 
530 
531 //! Reads the JSON text document (internal use)
532 /*!
533  This is a recursive function that is called by \c Parse()
534  and by the \c DoRead() function itself when a new object /
535  array character is encontered.
536  The function returns when a EOF condition is encontered or
537  when the corresponding close-object / close-array char is encontered.
538  The function also increments the \c m_level
539  data member when it is entered and decrements it on return.
540  It also sets \c m_depth equal to \c m_level if \c m_depth is
541  less than \c m_level.
542 
543  The function is the heart of the wxJSON parser class but it is
544  also very easy to understand because JSON syntax is very
545  easy.
546 
547  Returns the last close-object/array character read or -1 on EOF.
548 
549  @param is    the input stream that contains the JSON text
550  @param parent the JSON value object that is the parent of all subobjects
551          read by the function until the next close-object/array (for
552          the top-level \c DoRead function \c parent is the root JSON object)
553  @return one of close-array or close-object char or -1 on error or EOF
554 */
555 int
DoRead(wxInputStream & is,wxJSONValue & parent)556 wxJSONReader::DoRead(wxInputStream& is, wxJSONValue& parent) {
557     ++m_level;
558     if (m_depth < m_level)    {
559         m_depth = m_level;
560     }
561 
562     // 'value' is the wxJSONValue structure that has to be
563     // read. Data read from the JSON text input is stored
564     // in the following object.
565     wxJSONValue value(wxJSONTYPE_INVALID);
566 
567     // sets the pointers to the current, next and last-stored objects
568     // in order to determine the value to which a comment refers to
569     m_next = &value;
570     m_current = &parent;
571     m_current->SetLineNo(m_lineNo);
572     m_lastStored = 0;
573 
574     // the 'key' string is stored from 'value' when a ':' is encontered
575     wxString  key;
576 
577     // the character read: -1=EOF, 0=to be read
578     int ch = 0;
579 
580     do {                   // we read until ch < 0
581         switch (ch)  {
582             case 0 :
583                 ch = ReadChar(is);
584                 break;
585             case ' ' :
586             case '\t' :
587             case '\n' :
588             case '\r' :
589                 ch = SkipWhiteSpace(is);
590                 break;
591             case -1 :   // the EOF
592                 break;
593             case '/' :
594                 ch = SkipComment(is);
595                 StoreComment(&parent);
596                 break;
597 
598             case '{' :
599                 if (parent.IsObject()) {
600                     if (key.empty())   {
601                         AddError(_T("\'{\' is not allowed here (\'name\' is missing"));
602                     }
603                     if (value.IsValid())   {
604                         AddError(_T("\'{\' cannot follow a \'value\'"));
605                           }
606                 } else if (parent.IsArray())  {
607                     if (value.IsValid())   {
608                         AddError(_T("\'{\' cannot follow a \'value\' in JSON array"));
609                     }
610                 } else  {
611                     wxJSON_ASSERT(0);       // always fails
612                 }
613 
614                 // the openobject char cause the DoRead() to be called recursively
615                 value.SetType(wxJSONTYPE_OBJECT);
616                 ch = DoRead(is, value);
617                 break;
618 
619             case '}' :
620                 if (!parent.IsObject())  {
621                     AddWarning(wxJSONREADER_MISSING,
622                     _T("Trying to close an array using the \'}\' (close-object) char"));
623                 }
624                 // close-object: store the current value, if any
625                 StoreValue(ch, key, value, parent);
626                 m_current = &parent;
627                 m_next    = 0;
628                 m_current->SetLineNo(m_lineNo);
629                 ch = ReadChar(is);
630                 return ch;
631                 break;
632 
633             case '[' :
634                 if (parent.IsObject()) {
635                     if (key.empty())   {
636                         AddError(_T("\'[\' is not allowed here (\'name\' is missing"));
637                     }
638                     if (value.IsValid())   {
639                         AddError(_T("\'[\' cannot follow a \'value\' text"));
640                     }
641                 } else if (parent.IsArray())  {
642                     if (value.IsValid())   {
643                         AddError(_T("\'[\' cannot follow a \'value\'"));
644                     }
645                 } else  {
646                     wxJSON_ASSERT(0);       // always fails
647                 }
648                 // open-array cause the DoRead() to be called recursively
649                 value.SetType(wxJSONTYPE_ARRAY);
650                 ch = DoRead(is, value);
651                 break;
652 
653             case ']' :
654                 if (!parent.IsArray())  {
655                     // wrong close-array char (should be close-object)
656                     AddWarning(wxJSONREADER_MISSING,
657                     _T("Trying to close an object using the \']\' (close-array) char"));
658                 }
659                 StoreValue(ch, key, value, parent);
660                 m_current = &parent;
661                 m_next    = 0;
662                 m_current->SetLineNo(m_lineNo);
663                 return 0;   // returning ZERO for reading the next char
664                 break;
665 
666             case ',' :
667                 // store the value, if any
668                 StoreValue(ch, key, value, parent);
669                 key.clear();
670                 ch = ReadChar(is);
671                 break;
672 
673             case '\"' :
674                 ch = ReadString(is, value);     // read a JSON string type
675                 m_current = &value;
676                 m_next    = 0;
677                 break;
678 
679             case '\'' :
680                 ch = ReadMemoryBuff(is, value);  // read a memory buffer type
681                 m_current = &value;
682                 m_next    = 0;
683                 break;
684 
685             case ':' :   // key / value separator
686                 m_current = &value;
687                 m_current->SetLineNo(m_lineNo);
688                 m_next    = 0;
689                 if (!parent.IsObject())  {
690                     AddError(_T("\':\' can only used in object's values"));
691                 } else if (!value.IsString())  {
692                     AddError(_T("\':\' follows a value which is not of type \'string\'"));
693                 } else if (!key.empty())  {
694                     AddError(_T("\':\' not allowed where a \'name\' string was already available"));
695                 } else {
696                     // the string in 'value' is set as the 'key'
697                     key = value.AsString();
698                     value.SetType(wxJSONTYPE_INVALID);
699                 }
700                 ch = ReadChar(is);
701                 break;
702 
703             default :
704                 // no special char: it is a literal or a number
705                 // errors are checked in the 'ReadValue()' function.
706                 m_current = &value;
707                 m_current->SetLineNo(m_lineNo);
708                 m_next    = 0;
709                 ch = ReadValue(is, ch, value);
710                 break;
711         } // end switch
712     } while (ch >= 0);
713 
714     // the DoRead() should return when the close-object/array char is encontered
715     // if we are here, the EOF condition was encontered so one or more close-something
716     // characters are missing
717     if (parent.IsArray())  {
718         AddWarning(wxJSONREADER_MISSING, _T("\']\' missing at end of file"));
719     } else if (parent.IsObject())  {
720         AddWarning(wxJSONREADER_MISSING, _T("\'}\' missing at end of file"));
721     } else {
722         wxJSON_ASSERT(0);
723     }
724 
725     // we store the value, as there is a missing close-object/array char
726     StoreValue(ch, key, value, parent);
727 
728     --m_level;
729     return ch;
730 }
731 
732 //! Store a value in the parent object.
733 /*!
734  The function is called by \c DoRead() when a the comma
735  or a close-object/array character is encontered and stores the current
736  value read by the parser in the parent object.
737  The function checks that \c value is not invalid and that \c key is
738  not an empty string if \c parent is an object.
739 
740  \param ch    the character read: a comma or close objecty/array char
741  \param key    the \b key string: must be empty if \c parent is an array
742  \param value    the current JSON value to be stored in \c parent
743  \param parent    the JSON value that is the parent of \c value.
744  \return none
745 */
746 void
StoreValue(int ch,const wxString & key,wxJSONValue & value,wxJSONValue & parent)747 wxJSONReader::StoreValue(int ch, const wxString& key, wxJSONValue& value, wxJSONValue& parent) {
748     // if 'ch' == } or ] than value AND key may be empty when a open object/array
749     // is immediatly followed by a close object/array
750     //
751     // if 'ch' == , (comma) value AND key (for TypeMap) cannot be empty
752     //
753 #if defined(JSONDEBUG)
754     wxLogTrace(traceMask, _T("(%s) ch=%d char=%c"), __PRETTY_FUNCTION__, ch, static_cast<char>(ch));
755     wxLogTrace(traceMask, _T("(%s) value=%s"), __PRETTY_FUNCTION__, value.AsString().c_str());
756 #endif
757 
758     m_current = 0;
759     m_next    = &value;
760     m_lastStored = 0;
761     m_next->SetLineNo(-1);
762 
763     if (!value.IsValid() && key.empty()) {
764         // OK, if the char read is a close-object or close-array
765         if (ch == '}' || ch == ']')  {
766             m_lastStored = 0;
767 #if defined(JSONDEBUG)
768             wxLogTrace(traceMask, _T("(%s) key and value are empty, returning"),
769                              __PRETTY_FUNCTION__);
770 #endif
771         } else {
772             AddError(_T("key or value is missing for JSON value"));
773         }
774     } else {
775         // key or value are not empty
776         if (parent.IsObject())  {
777             if (!value.IsValid()) {
778                 AddError(_T("cannot store the value: \'value\' is missing for JSON object type"));
779              } else if (key.empty()) {
780                 AddError(_T("cannot store the value: \'key\' is missing for JSON object type"));
781             } else {
782                 // OK, adding the value to parent key/value map
783 #if defined(JSONDEBUG)
784                 wxLogTrace(traceMask, _T("(%s) adding value to key:%s"),
785                      __PRETTY_FUNCTION__, key.c_str());
786 #endif
787                 parent[key] = value;
788                 m_lastStored = &(parent[key]);
789                 m_lastStored->SetLineNo(m_lineNo);
790             }
791         } else if (parent.IsArray()) {
792             if (!value.IsValid()) {
793                     AddError(_T("cannot store the item: \'value\' is missing for JSON array type"));
794             }
795             if (!key.empty()) {
796                 AddError(_T("cannot store the item: \'key\' (\'%s\') is not permitted in JSON array type"), key);
797             }
798 #if defined(JSONDEBUG)
799             wxLogTrace(traceMask, _T("(%s) appending value to parent array"),
800                                  __PRETTY_FUNCTION__);
801 #endif
802             parent.Append(value);
803             const wxJSONInternalArray* arr = parent.AsArray();
804             wxJSON_ASSERT(arr);
805             m_lastStored = &(arr->Last());
806             m_lastStored->SetLineNo(m_lineNo);
807         } else {
808             wxJSON_ASSERT(0);  // should never happen
809         }
810     }
811     value.SetType(wxJSONTYPE_INVALID);
812     value.ClearComments();
813 }
814 
815 //! Add a error message to the error's array
816 /*!
817  The overloaded versions of this function add an error message to the
818  error's array stored in \c m_errors.
819  The error message is formatted as follows:
820 
821  \code
822    Error: line xxx, col xxx - <error_description>
823  \endcode
824 
825  The \c msg parameter is the description of the error; line's and column's
826  number are automatically added by the functions.
827  The \c fmt parameter is a format string that has the same syntax as the \b printf
828  function.
829  Note that it is the user's responsability to provide a format string suitable
830  with the arguments: another string or a character.
831 */
832 void
AddError(const wxString & msg)833 wxJSONReader::AddError(const wxString& msg) {
834     wxString err;
835     err.Printf(_T("Error: line %d, col %d - %s"), m_lineNo, m_colNo, msg.c_str());
836 
837 #if defined(JSONDEBUG)
838     wxLogTrace(traceMask, _T("(%s) %s"), __PRETTY_FUNCTION__, err.c_str());
839 #endif
840 
841     if (static_cast<int>(m_errors.size()) < m_maxErrors)  {
842         m_errors.Add(err);
843     } else if (static_cast<int>(m_errors.size()) == m_maxErrors)  {
844         m_errors.Add(_T("ERROR: too many error messages - ignoring further errors"));
845     }
846     // else if (m_errors > m_maxErrors) do nothing, thus ignore the error message
847 }
848 
849 //! \overload AddError(const wxString&)
850 void
AddError(const wxString & fmt,const wxString & str)851 wxJSONReader::AddError(const wxString& fmt, const wxString& str) {
852     wxString s;
853     s.Printf(fmt.c_str(), str.c_str());
854     AddError(s);
855 }
856 
857 //! \overload AddError(const wxString&)
858 void
AddError(const wxString & fmt,wxChar c)859 wxJSONReader::AddError(const wxString& fmt, wxChar c) {
860     wxString s;
861     s.Printf(fmt.c_str(), c);
862     AddError(s);
863 }
864 
865 //! Add a warning message to the warning's array
866 /*!
867  The warning description is as follows:
868  \code
869    Warning: line xxx, col xxx - <warning_description>
870  \endcode
871 
872  Warning messages are generated by the parser when the JSON
873  text that has been read is not well-formed but the
874  error is not fatal and the parser recognizes the text
875  as an extension to the JSON standard (see the parser's ctor
876  for more info about wxJSON extensions).
877 
878  Note that the parser has to be constructed with a flag that
879  indicates if each individual wxJSON extension is on.
880  If the warning message is related to an extension that is not
881  enabled in the parser's \c m_flag data member, this function
882  calls AddError() and the warning message becomes an error
883  message.
884  The \c type parameter is one of the same constants that
885  specify the parser's extensions.
886  If type is ZERO than the function always adds a warning
887 */
888 void
AddWarning(int type,const wxString & msg)889 wxJSONReader::AddWarning(int type, const wxString& msg) {
890     // if 'type' AND 'm_flags' == 1 than the extension is
891     // ON. Otherwise it is OFF anf the function calls AddError()
892     if (type != 0)    {
893         if ((type & m_flags) == 0)  {
894             AddError(msg);
895             return;
896         }
897     }
898 
899     wxString err;
900     err.Printf(_T("Warning: line %d, col %d - %s"), m_lineNo, m_colNo, msg.c_str());
901 
902 #if defined(JSONDEBUG)
903     wxLogTrace(traceMask, _T("(%s) %s"), __PRETTY_FUNCTION__, err.c_str());
904 #endif
905     if (static_cast<int>(m_warnings.size()) < m_maxErrors)  {
906         m_warnings.Add(err);
907     } else if (static_cast<int>(m_warnings.size()) == m_maxErrors)  {
908         m_warnings.Add(_T("Error: too many warning messages - ignoring further warnings"));
909     }
910     // else do nothing, thus ignore the warning message
911 }
912 
913 //! Skip all whitespaces.
914 /*!
915  The function reads characters from the input text
916  and returns the first non-whitespace character read or -1
917  if EOF.
918  Note that the function does not rely on the \b isspace function
919  of the C library but checks the space constants: space, TAB and
920  LF.
921 */
922 int
SkipWhiteSpace(wxInputStream & is)923 wxJSONReader::SkipWhiteSpace(wxInputStream& is) {
924     // just read one byte at a time and check for whitespaces
925     int ch;
926     do {
927         ch = ReadChar(is);
928         if (ch < 0)  {
929             break;
930         }
931     }
932     while (ch == ' ' || ch == '\n' || ch == '\t') ; //NOLINT(whitespace/semicolon)
933 #if defined(JSONDEBUG)
934     wxLogTrace(traceMask, _T("(%s) end whitespaces line=%d col=%d"),
935              __PRETTY_FUNCTION__, m_lineNo, m_colNo);
936 #endif
937     return ch;
938 }
939 
940 //! Skip a comment
941 /*!
942  The function is called by DoRead() when a '/' (slash) character
943  is read from the input stream assuming that a C/C++ comment is starting.
944  Returns the first character that follows the comment or
945  -1 on EOF.
946  The function also adds a warning message because comments are not
947  valid JSON text.
948  The function also stores the comment, if any, in the \c m_comment data
949  member: it can be used by the DoRead() function if comments have to be
950  stored in the value they refer to.
951 */
952 int
SkipComment(wxInputStream & is)953 wxJSONReader::SkipComment(wxInputStream& is) {
954     static const wxChar* warn =
955     _T("Comments may be tolerated in JSON text but they are not part of JSON syntax");
956 
957     // if it is a comment, then a warning is added to the array
958     // otherwise it is an error: values cannot start with a '/'
959     // read the char next to the first slash
960     int ch = ReadChar(is);
961     if (ch < 0)  {
962         return -1;
963     }
964 
965 #if defined(JSONDEBUG)
966     wxLogTrace(storeTraceMask, _T("(%s) start comment line=%d col=%d"),
967              __PRETTY_FUNCTION__, m_lineNo, m_colNo);
968 #endif
969 
970     // the temporary UTF-8/ANSI buffer that holds the comment string. This will be
971     // converted to a wxString object using wxString::FromUTF8() or From8BitData()
972     wxMemoryBuffer utf8Buff;
973     unsigned char c;
974 
975     if (ch == '/')  {         // C++ comment, read until end-of-line
976         // C++ comment strings are in UTF-8 format. we store all
977         // UTF-8 code units until the first LF or CR+LF
978         AddWarning(wxJSONREADER_ALLOW_COMMENTS, warn);
979         m_commentLine = m_lineNo;
980         utf8Buff.AppendData("//", 2);
981 
982         while (ch >= 0)  {
983             if (ch == '\n')    {
984                 break;
985             }
986             if (ch == '\r')    {
987                 ch = PeekChar(is);
988                 if (ch == '\n')    {
989                     ch = ReadChar(is);
990                 }
991                 break;
992             } else {
993                 // store the char in the UTF8 temporary buffer
994                 c = (unsigned char) ch;
995                 utf8Buff.AppendByte(c);
996             }
997             ch = ReadChar(is);
998         }
999         // now convert the temporary UTF-8 buffer
1000         m_comment = wxString::FromUTF8((const char*) utf8Buff.GetData(),
1001                         utf8Buff.GetDataLen());
1002 
1003     // check if a C-style comment
1004     } else if (ch == '*')  {     // C-style comment
1005         AddWarning(wxJSONREADER_ALLOW_COMMENTS, warn);
1006         m_commentLine = m_lineNo;
1007         utf8Buff.AppendData("/*", 2);
1008         while (ch >= 0) {
1009             // check the END-COMMENT chars ('*/')
1010             if (ch == '*')    {
1011                 ch = PeekChar(is);
1012                 if (ch == '/')    {
1013                     ch = ReadChar(is);  // read the '/' char
1014                     ch = ReadChar(is);  // read the next char that will be returned
1015                     utf8Buff.AppendData("*/", 2);
1016                     break;
1017                 }
1018             }
1019             // store the char in the UTF8 temporary buffer
1020             c = (unsigned char) ch;
1021             utf8Buff.AppendByte(c);
1022             ch = ReadChar(is);
1023         }
1024         // now convert the temporary buffer in a wxString object
1025         if (m_noUtf8)    {
1026             m_comment = wxString::From8BitData((const char*) utf8Buff.GetData(),
1027                                 utf8Buff.GetDataLen());
1028         } else {
1029             m_comment = wxString::FromUTF8((const char*) utf8Buff.GetData(),
1030                                 utf8Buff.GetDataLen());
1031         }
1032     } else {   // it is not a comment, return the character next the first '/'
1033         AddError(_T("Strange '/' (did you want to insert a comment?)"));
1034         // we read until end-of-line OR end of C-style comment OR EOF
1035         // because a '/' should be a start comment
1036         while (ch >= 0) {
1037             ch = ReadChar(is);
1038             if (ch == '*' && PeekChar(is) == '/')  {
1039                 break;
1040             }
1041             if (ch == '\n')  {
1042                 break;
1043             }
1044         }
1045         // read the next char that will be returned
1046         ch = ReadChar(is);
1047     }
1048 #if defined(JSONDEBUG)
1049     wxLogTrace(traceMask, _T("(%s) end comment line=%d col=%d"),
1050              __PRETTY_FUNCTION__, m_lineNo, m_colNo);
1051     wxLogTrace(storeTraceMask, _T("(%s) end comment line=%d col=%d"),
1052              __PRETTY_FUNCTION__, m_lineNo, m_colNo);
1053     wxLogTrace(storeTraceMask, _T("(%s) comment=%s"),
1054              __PRETTY_FUNCTION__, m_comment.c_str());
1055 #endif
1056     return ch;
1057 }
1058 
1059 //! Read a string value
1060 /*!
1061  The function reads a string value from input stream and it is
1062  called by the \c DoRead() function when it enconters the
1063  double quote characters.
1064  The function read all bytes up to the next double quotes
1065  (unless it is escaped) and stores them in a temporary UTF-8
1066  memory buffer.
1067  Also, the function processes the escaped characters defined
1068  in the JSON syntax.
1069 
1070  Next, the function tries to convert the UTF-8 buffer to a
1071  \b wxString object using the \b wxString::FromUTF8 function.
1072  Depending on the build mode, we can have the following:
1073  \li in Unicode the function always succeeds, provided that the
1074     buffer contains valid UTF-8 code units.
1075 
1076  \li in ANSI builds the conversion may fail because of the presence of
1077     unrepresentable characters in the current locale. In this case,
1078     the default behaviour is to perform a char-by-char conversion; every
1079     char that cannot be represented in the current locale is stored as
1080     \e unicode \e escaped \e sequence
1081 
1082  \li in ANSI builds, if the reader is constructed with the wxJSONREADER_NOUTF8_STREAM
1083      then no conversion takes place and the UTF-8 temporary buffer is simply
1084      \b copied to the \b wxString object
1085 
1086  The string is, finally, stored in the provided wxJSONValue argument
1087  provided that it is empty or it contains a string value.
1088  This is because the parser class recognizes multi-line strings
1089  like the following one:
1090  \code
1091    [
1092       "This is a very long string value which is splitted into more"
1093       "than one line because it is more human readable"
1094    ]
1095  \endcode
1096  Because of the lack of the value separator (,) the parser
1097  assumes that the string was splitted into several double-quoted
1098  strings.
1099  If the value does not contain a string then an error is
1100  reported.
1101  Splitted strings cause the parser to report a warning.
1102 */
1103 int
ReadString(wxInputStream & is,wxJSONValue & val)1104 wxJSONReader::ReadString(wxInputStream& is, wxJSONValue& val) {
1105     // the char last read is the opening qoutes (")
1106 
1107     wxMemoryBuffer utf8Buff;
1108     char ues[8];        // stores a Unicode Escaped Esquence: \uXXXX
1109 
1110     int ch = 0;
1111     while (ch >= 0) {
1112         ch = ReadChar(is);
1113         unsigned char c = (unsigned char) ch;
1114         if (ch == '\\')  {    // an escape sequence
1115             ch = ReadChar(is);
1116             switch (ch)  {
1117                 case -1 :        // EOF
1118                     break;
1119                 case 't' :
1120                     utf8Buff.AppendByte('\t');
1121                     break;
1122                 case 'n' :
1123                     utf8Buff.AppendByte('\n');
1124                     break;
1125                 case 'b' :
1126                     utf8Buff.AppendByte('\b');
1127                     break;
1128                 case 'r' :
1129                     utf8Buff.AppendByte('\r');
1130                     break;
1131                 case '\"' :
1132                     utf8Buff.AppendByte('\"');
1133                     break;
1134                 case '\\' :
1135                     utf8Buff.AppendByte('\\');
1136                     break;
1137                 case '/' :
1138                     utf8Buff.AppendByte('/');
1139                     break;
1140                 case 'f' :
1141                     utf8Buff.AppendByte('\f');
1142                     break;
1143                 case 'u' :
1144                     ch = ReadUES(is, ues);
1145                     if (ch < 0) {        // if EOF, returns
1146                         return ch;
1147                     }
1148                     // append the escaped character to the UTF8 buffer
1149                     AppendUES(utf8Buff, ues);
1150                     // many thanks to Bryan Ashby who discovered this bug
1151                     continue;
1152                     // break;
1153                 default :
1154                     AddError(_T("Unknow escaped character \'\\%c\'"), ch);
1155             }
1156         } else {
1157             // we have read a non-escaped character so we have to append it to
1158             // the temporary UTF-8 buffer until the next quote char
1159             if (ch == '\"')    {
1160                 break;
1161             }
1162             utf8Buff.AppendByte(c);
1163         }
1164     }
1165 
1166     // if UTF-8 conversion is disabled (ANSI builds only) we just copy the
1167     // bit data to a wxString object
1168     wxString s;
1169     if (m_noUtf8)    {
1170         s = wxString::From8BitData((const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1171     } else {
1172         // perform UTF-8 conversion
1173         // first we check that the UTF-8 buffer is correct, i.e. it contains valid
1174         // UTF-8 code points.
1175         // this works in both ANSI and Unicode builds.
1176         size_t convLen = wxConvUTF8.ToWChar(0,        // wchar_t destination
1177                         0,                            // size_t  destLenght
1178             (const char*) utf8Buff.GetData(),        // char_t  source
1179                 utf8Buff.GetDataLen());                // size_t  sourceLenght
1180 
1181         if (convLen == wxCONV_FAILED)    {
1182             AddError(_T("String value: the UTF-8 stream is invalid"));
1183             s.append(_T("<UTF-8 stream not valid>"));
1184         } else {
1185 #if defined(wxJSON_USE_UNICODE)
1186             // in Unicode just convert to wxString
1187             s = wxString::FromUTF8((const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1188 #else
1189             // in ANSI, the conversion may fail and an empty string is returned
1190             // in this case, the reader do a char-by-char conversion storing
1191               // unicode escaped sequences of unrepresentable characters
1192             s = wxString::FromUTF8((const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1193             if (s.IsEmpty())    {
1194                 int r = ConvertCharByChar(s, utf8Buff);    // return number of escaped sequences
1195                 if (r > 0)    {
1196                     AddWarning(0, _T("The string value contains unrepresentable Unicode characters"));
1197                 }
1198             }
1199 #endif
1200         }
1201      }
1202 #if defined(JSONDEBUG)
1203     wxLogTrace(traceMask, _T("(%s) line=%d col=%d"),
1204              __PRETTY_FUNCTION__, m_lineNo, m_colNo);
1205     wxLogTrace(traceMask, _T("(%s) string read=%s"),
1206              __PRETTY_FUNCTION__, s.c_str());
1207     wxLogTrace(traceMask, _T("(%s) value=%s"),
1208              __PRETTY_FUNCTION__, val.AsString().c_str());
1209 #endif
1210 
1211     // now assign the string to the JSON-value 'value'
1212     // must check that:
1213     //   'value'  is empty
1214     //   'value'  is a string; concatenate it but emit warning
1215     if (!val.IsValid())   {
1216 #if defined(JSONDEBUG)
1217         wxLogTrace(traceMask, _T("(%s) assigning the string to value"), __PRETTY_FUNCTION__);
1218 #endif
1219         val = s;
1220     } else if (val.IsString())  {
1221         AddWarning(wxJSONREADER_MULTISTRING,
1222             _T("Multiline strings are not allowed by JSON syntax"));
1223 #if defined(JSONDEBUG)
1224         wxLogTrace(traceMask, _T("(%s) concatenate the string to value"), __PRETTY_FUNCTION__);
1225 #endif
1226         val.Cat(s);
1227     } else {
1228         AddError(_T("String value \'%s\' cannot follow another value"), s);
1229     }
1230 
1231     // store the input text's line number when the string was stored in 'val'
1232     val.SetLineNo(m_lineNo);
1233 
1234     // read the next char after the closing quotes and returns it
1235     if (ch >= 0)  {
1236         ch = ReadChar(is);
1237     }
1238     return ch;
1239 }
1240 
1241 //! Reads a token string
1242 /*!
1243  This function is called by the ReadValue() when the
1244  first character encontered is not a special char
1245  and it is not a double-quote.
1246  The only possible type is a literal or a number which
1247  all lies in the US-ASCII charset so their UTF-8 encodeing
1248  is the same as US-ASCII.
1249  The function simply reads one byte at a time from the stream
1250  and appends them to a \b wxString object.
1251  Returns the next character read.
1252 
1253  A token cannot include \e unicode \e escaped \e sequences
1254  so this function does not try to interpret such sequences.
1255 
1256  @param is    the input stream
1257  @param ch    the character read by DoRead
1258  @param s    the string object that contains the token read
1259  @return -1 in case of errors or EOF
1260 */
1261 int
ReadToken(wxInputStream & is,int ch,wxString & s)1262 wxJSONReader::ReadToken(wxInputStream& is, int ch, wxString& s) {
1263     int nextCh = ch;
1264     while (nextCh >= 0) {
1265         switch (nextCh) {
1266             case ' ' :
1267             case ',' :
1268             case ':' :
1269             case '[' :
1270             case ']' :
1271             case '{' :
1272             case '}' :
1273             case '\t' :
1274             case '\n' :
1275             case '\r' :
1276             case '\b' :
1277 #if defined(JSONDEBUG)
1278                 wxLogTrace(traceMask, _T("(%s) line=%d col=%d"),
1279                      __PRETTY_FUNCTION__, m_lineNo, m_colNo);
1280                 wxLogTrace(traceMask, _T("(%s) token read=%s"),
1281                      __PRETTY_FUNCTION__, s.c_str());
1282 #endif
1283                 return nextCh;
1284                 break;
1285             default :
1286                 s.Append((unsigned char) nextCh, 1);
1287                 break;
1288         }
1289         // read the next character
1290         nextCh = ReadChar(is);
1291     }
1292 #if defined(JSONDEBUG)
1293     wxLogTrace(traceMask, _T("(%s) EOF on line=%d col=%d"),
1294          __PRETTY_FUNCTION__, m_lineNo, m_colNo);
1295     wxLogTrace(traceMask, _T("(%s) EOF - token read=%s"),
1296              __PRETTY_FUNCTION__, s.c_str());
1297 #endif
1298     return nextCh;
1299 }
1300 
1301 //! Read a value from input stream
1302 /*!
1303  The function is called by DoRead() when it enconters a char that is
1304  not a special char nor a double-quote.
1305  It assumes that the string is a numeric value or a literal
1306  boolean value and stores it in the wxJSONValue object \c val.
1307 
1308  The function also checks that \c val is of type wxJSONTYPE_INVALID otherwise
1309  an error is reported becasue a value cannot follow another value:
1310  maybe a (,) or (:) is missing.
1311 
1312  If the literal starts with a digit, a plus or minus sign, the function
1313  tries to interpret it as a number. The following are tried by the function,
1314  in this order:
1315 
1316  \li if the literal starts with a digit: signed integer, then unsigned integer
1317         and finally double conversion is tried
1318  \li if the literal starts with a minus sign: signed integer, then  double
1319         conversion is tried
1320  \li if the literal starts with plus sign: unsigned integer
1321         then double conversion is tried
1322 
1323  Returns the next character or -1 on EOF.
1324 */
1325 int
ReadValue(wxInputStream & is,int ch,wxJSONValue & val)1326 wxJSONReader::ReadValue(wxInputStream& is, int ch, wxJSONValue& val) {
1327     wxString s;
1328     int nextCh = ReadToken(is, ch, s);
1329 #if defined(JSONDEBUG)
1330     wxLogTrace(traceMask, _T("(%s) value=%s"),
1331              __PRETTY_FUNCTION__, val.AsString().c_str());
1332 #endif
1333 
1334     if (val.IsValid())  {
1335         AddError(_T("Value \'%s\' cannot follow a value: \',\' or \':\' missing?"), s);
1336         return nextCh;
1337     }
1338 
1339     // variables used for converting numeric values
1340     bool r;  double d;
1341 #if defined(wxJSON_64BIT_INT)
1342     wxInt64  i64;
1343     wxUint64 ui64;
1344 #else
1345     unsigned long int ul; long int l;
1346 #endif
1347 
1348     // first try the literal strings lowercase and nocase
1349     if (s == _T("null")) {
1350         val.SetType(wxJSONTYPE_NULL);
1351 #if defined(JSONDEBUG)
1352         wxLogTrace(traceMask, _T("(%s) value = NULL"),  __PRETTY_FUNCTION__);
1353 #endif
1354         return nextCh;
1355     } else if (s.CmpNoCase(_T("null")) == 0) {
1356 #if defined(JSONDEBUG)
1357         wxLogTrace(traceMask, _T("(%s) value = NULL"),  __PRETTY_FUNCTION__);
1358 #endif
1359         AddWarning(wxJSONREADER_CASE, _T("the \'null\' literal must be lowercase"));
1360         val.SetType(wxJSONTYPE_NULL);
1361         return nextCh;
1362     } else if (s == _T("true")) {
1363 #if defined(JSONDEBUG)
1364         wxLogTrace(traceMask, _T("(%s) value = TRUE"),  __PRETTY_FUNCTION__);
1365 #endif
1366         val = true;
1367         return nextCh;
1368     } else if (s.CmpNoCase(_T("true")) == 0) {
1369 #if defined(JSONDEBUG)
1370         wxLogTrace(traceMask, _T("(%s) value = TRUE"),  __PRETTY_FUNCTION__);
1371 #endif
1372         AddWarning(wxJSONREADER_CASE, _T("the \'true\' literal must be lowercase"));
1373         val = true;
1374         return nextCh;
1375     } else if (s == _T("false")) {
1376 #if defined(JSONDEBUG)
1377         wxLogTrace(traceMask, _T("(%s) value = FALSE"),  __PRETTY_FUNCTION__);
1378 #endif
1379         val = false;
1380         return nextCh;
1381     } else if (s.CmpNoCase(_T("false")) == 0) {
1382 #if defined(JSONDEBUG)
1383         wxLogTrace(traceMask, _T("(%s) value = FALSE"),  __PRETTY_FUNCTION__);
1384 #endif
1385         AddWarning(wxJSONREADER_CASE, _T("the \'false\' literal must be lowercase"));
1386         val = false;
1387         return nextCh;
1388     }
1389 
1390 
1391     // try to convert to a number if the token starts with a digit, a plus or a minus
1392     // sign. The function first states what type of conversion are tested:
1393     //    1. first signed integer (not if 'ch' == '+')
1394     //    2. unsigned integer (not if 'ch' == '-')
1395     //    3. finally double
1396     bool tSigned = true, tUnsigned = true, tDouble = true;
1397     switch (ch)  {
1398         case '0' :
1399         case '1' :
1400         case '2' :
1401         case '3' :
1402         case '4' :
1403         case '5' :
1404         case '6' :
1405         case '7' :
1406         case '8' :
1407         case '9' :
1408             // first try a signed integer, then a unsigned integer, then a double
1409             break;
1410 
1411         case '+' :
1412             // the plus sign forces a unsigned integer
1413             tSigned = false;
1414             break;
1415 
1416         case '-' :
1417             // try signed and double
1418             tUnsigned = false;
1419             break;
1420         default :
1421             AddError(_T("Literal \'%s\' is incorrect (did you forget quotes?)"), s);
1422             return nextCh;
1423     }
1424 
1425     if (tSigned)    {
1426     #if defined(wxJSON_64BIT_INT)
1427         r = Strtoll(s, &i64);
1428 #if defined(JSONDEBUG)
1429         wxLogTrace(traceMask, _T("(%s) convert to wxInt64 result=%d"),
1430                   __PRETTY_FUNCTION__, r);
1431 #endif
1432         if (r)  {
1433             // store the value
1434             val = i64;
1435             return nextCh;
1436         }
1437     #else
1438         r = s.ToLong(&l);
1439 #if defined(JSONDEBUG)
1440         wxLogTrace(traceMask, _T("(%s) convert to int result=%d"),
1441                  __PRETTY_FUNCTION__, r);
1442 #endif
1443         if (r)  {
1444             // store the value
1445             val = static_cast<int>(l);
1446             return nextCh;
1447         }
1448     #endif
1449     }
1450 
1451     if (tUnsigned)    {
1452     #if defined(wxJSON_64BIT_INT)
1453         r = Strtoull(s, &ui64);
1454 #if defined(JSONDEBUG)
1455         wxLogTrace(traceMask, _T("(%s) convert to wxUint64 result=%d"),
1456                               __PRETTY_FUNCTION__, r);
1457 #endif
1458         if (r)  {
1459             // store the value
1460             val = ui64;
1461             return nextCh;
1462         }
1463     #else
1464         r = s.ToULong(&ul);
1465 #if defined(JSONDEBUG)
1466         wxLogTrace(traceMask, _T("(%s) convert to int result=%d"),
1467                          __PRETTY_FUNCTION__, r);
1468 #endif
1469         if (r)  {
1470             // store the value
1471             val = (unsigned int) ul;
1472             return nextCh;
1473         }
1474     #endif
1475     }
1476 
1477     if (tDouble)    {
1478         r = s.ToDouble(&d);
1479 #if defined(JSONDEBUG)
1480         wxLogTrace(traceMask, _T("(%s) convert to double result=%d"),
1481                  __PRETTY_FUNCTION__, r);
1482 #endif
1483         if (r)  {
1484             // store the value
1485             val = d;
1486             return nextCh;
1487         }
1488     }
1489 
1490 
1491     // the value is not syntactically correct
1492     AddError(_T("Literal \'%s\' is incorrect (did you forget quotes?)"), s);
1493     return nextCh;
1494   return nextCh;
1495 }
1496 
1497 
1498 //! Read a 4-hex-digit unicode character.
1499 /*!
1500  The function is called by ReadString() when the \b \\u sequence is
1501  encontered; the sequence introduces a control character in the form:
1502  \code
1503      \uXXXX
1504  \endcode
1505  where XXXX is a four-digit hex code..
1506  The function reads four chars from the input UTF8 stream by calling ReadChar()
1507  four times: if EOF is encontered before reading four chars, -1 is
1508  also returned and no sequence interpretation is performed.
1509  The function stores the 4 hexadecimal digits in the \c uesBuffer parameter.
1510 
1511  Returns the character after the hex sequence or -1 if EOF.
1512 
1513  \b NOTICE: although the JSON syntax states that only control characters
1514  are represented in this way, the wxJSON library reads and recognizes all
1515  unicode characters in the BMP.
1516 */
1517 int
ReadUES(wxInputStream & is,char * uesBuffer)1518 wxJSONReader::ReadUES(wxInputStream& is, char* uesBuffer) {
1519     int ch;
1520     for (int i = 0; i < 4; i++)  {
1521         ch = ReadChar(is);
1522         if (ch < 0)  {
1523             return ch;
1524         }
1525         uesBuffer[i] = (unsigned char) ch;
1526     }
1527     uesBuffer[4] = 0;    // makes a ASCIIZ string
1528 
1529     return 0;
1530 }
1531 
1532 
1533 //! The function appends a Unice Escaped Sequence to the temporary UTF8 buffer
1534 /*!
1535  This function is called by \c ReadString() when a \e unicode \e escaped
1536  \e sequence is read from the input text as for example:
1537 
1538  \code
1539   \u0001
1540  \endcode
1541 
1542  which represents a control character.
1543  The \c uesBuffer parameter contains the 4 hexadecimal digits that are
1544  read from \c ReadUES.
1545 
1546  The function tries to convert the 4 hex digits in a \b wchar_t character
1547  which is appended to the memory buffer \c utf8Buff after converting it
1548  to UTF-8.
1549 
1550  If the conversion from hexadecimal fails, the function does not
1551  store the character in the UTF-8 buffer and an error is reported.
1552  The function is the same in ANSI and Unicode.
1553  Returns -1 if the buffer does not contain valid hex digits.
1554  sequence. On success returns ZERO.
1555 
1556  @param utf8Buff    the UTF-8 buffer to which the control char is written
1557  @param uesBuffer    the four-hex-digits read from the input text
1558  @return ZERO on success, -1 if the four-hex-digit buffer cannot be converted
1559 */
1560 int
AppendUES(wxMemoryBuffer & utf8Buff,const char * uesBuffer)1561 wxJSONReader::AppendUES(wxMemoryBuffer& utf8Buff, const char* uesBuffer) {
1562     unsigned long l;
1563     int r = sscanf(uesBuffer, "%lx", &l);    // r is the assigned items
1564     if (r != 1)  {
1565         AddError(_T("Invalid Unicode Escaped Sequence"));
1566         return -1;
1567     }
1568 #if defined(JSONDEBUG)
1569     wxLogTrace(traceMask, _T("(%s) unicode sequence=%s code=%ld"),
1570               __PRETTY_FUNCTION__, uesBuffer, l);
1571 #endif
1572 
1573     wchar_t ch = (wchar_t) l;
1574     char buffer[16];
1575     size_t len = wxConvUTF8.FromWChar(buffer, 10, &ch, 1);
1576 
1577     // seems that the wxMBConv classes always appends a NULL byte to
1578     // the converted buffer
1579     if (len > 1)    {
1580         len = len - 1;
1581     }
1582     utf8Buff.AppendData(buffer, len);
1583 
1584     // sould never fail
1585     wxASSERT(len != wxCONV_FAILED);
1586     return 0;
1587 }
1588 
1589 //! Store the comment string in the value it refers to.
1590 /*!
1591  The function searches a suitable value object for storing the
1592  comment line that was read by the parser and temporarly
1593  stored in \c m_comment.
1594  The function searches the three values pointed to by:
1595  \li \c m_next
1596  \li \c m_current
1597  \li \c m_lastStored
1598 
1599  The value that the comment refers to is:
1600 
1601  \li if the comment is on the same line as one of the values, the comment
1602     refer to that value and it is stored as \b inline.
1603  \li otherwise, if the comment flag is wxJSONREADER_COMMENTS_BEFORE, the comment lines
1604     are stored in the value pointed to by \c m_next
1605  \li otherwise, if the comment flag is wxJSONREADER_COMMENTS_AFTER, the comment lines
1606     are stored in the value pointed to by \c m_current or m_latStored
1607 
1608  Note that the comment line is only stored if the wxJSONREADER_STORE_COMMENTS
1609  flag was used when the parser object was constructed; otherwise, the
1610  function does nothing and immediatly returns.
1611  Also note that if the comment line has to be stored but the
1612  function cannot find a suitable value to add the comment line to,
1613  an error is reported (note: not a warning but an error).
1614 */
1615 void
StoreComment(const wxJSONValue * parent)1616 wxJSONReader::StoreComment(const wxJSONValue* parent) {
1617 #if defined(JSONDEBUG)
1618     wxLogTrace(storeTraceMask, _T("(%s) m_comment=%s"),  __PRETTY_FUNCTION__, m_comment.c_str());
1619     wxLogTrace(storeTraceMask, _T("(%s) m_flags=%d m_commentLine=%d"),
1620               __PRETTY_FUNCTION__, m_flags, m_commentLine);
1621     wxLogTrace(storeTraceMask, _T("(%s) m_current=%p"), __PRETTY_FUNCTION__, m_current);
1622     wxLogTrace(storeTraceMask, _T("(%s) m_next=%p"), __PRETTY_FUNCTION__, m_next);
1623     wxLogTrace(storeTraceMask, _T("(%s) m_lastStored=%p"), __PRETTY_FUNCTION__, m_lastStored);
1624 #endif
1625 
1626     // first check if the 'store comment' bit is on
1627     if ((m_flags & wxJSONREADER_STORE_COMMENTS) == 0)  {
1628         m_comment.clear();
1629         return;
1630     }
1631 
1632     // check if the comment is on the same line of one of the
1633     // 'current', 'next' or 'lastStored' value
1634     if (m_current != 0)  {
1635 #if defined(JSONDEBUG)
1636         wxLogTrace(storeTraceMask, _T("(%s) m_current->lineNo=%d"),
1637              __PRETTY_FUNCTION__, m_current->GetLineNo());
1638 #endif
1639         if (m_current->GetLineNo() == m_commentLine) {
1640 #if defined(JSONDEBUG)
1641             wxLogTrace(storeTraceMask, _T("(%s) comment added to \'m_current\' INLINE"),
1642              __PRETTY_FUNCTION__);
1643 #endif
1644             m_current->AddComment(m_comment, wxJSONVALUE_COMMENT_INLINE);
1645             m_comment.clear();
1646             return;
1647         }
1648     }
1649     if (m_next != 0)  {
1650 #if defined(JSONDEBUG)
1651         wxLogTrace(storeTraceMask, _T("(%s) m_next->lineNo=%d"),
1652              __PRETTY_FUNCTION__, m_next->GetLineNo());
1653 #endif
1654         if (m_next->GetLineNo() == m_commentLine) {
1655 #if defined(JSONDEBUG)
1656             wxLogTrace(storeTraceMask, _T("(%s) comment added to \'m_next\' INLINE"),
1657                  __PRETTY_FUNCTION__);
1658 #endif
1659             m_next->AddComment(m_comment, wxJSONVALUE_COMMENT_INLINE);
1660             m_comment.clear();
1661             return;
1662         }
1663     }
1664     if (m_lastStored != 0)  {
1665 #if defined(JSONDEBUG)
1666         wxLogTrace(storeTraceMask, _T("(%s) m_lastStored->lineNo=%d"),
1667              __PRETTY_FUNCTION__, m_lastStored->GetLineNo());
1668 #endif
1669         if (m_lastStored->GetLineNo() == m_commentLine) {
1670 #if defined(JSONDEBUG)
1671             wxLogTrace(storeTraceMask, _T("(%s) comment added to \'m_lastStored\' INLINE"),
1672                  __PRETTY_FUNCTION__);
1673 #endif
1674             m_lastStored->AddComment(m_comment, wxJSONVALUE_COMMENT_INLINE);
1675             m_comment.clear();
1676             return;
1677         }
1678     }
1679 
1680     // if comment is BEFORE, store the comment in the 'm_next'
1681     // or 'm_current' value
1682     // if comment is AFTER, store the comment in the 'm_lastStored'
1683     // or 'm_current' value
1684 
1685     if (m_flags & wxJSONREADER_COMMENTS_AFTER)  {  // comment AFTER
1686         if (m_current)  {
1687             if (m_current == parent || !m_current->IsValid()) {
1688                 AddError(_T("Cannot find a value for storing the comment (flag AFTER)"));
1689             } else {
1690 #if defined(JSONDEBUG)
1691                 wxLogTrace(storeTraceMask, _T("(%s) comment added to m_current (AFTER)"),
1692                      __PRETTY_FUNCTION__);
1693 #endif
1694                 m_current->AddComment(m_comment, wxJSONVALUE_COMMENT_AFTER);
1695             }
1696         } else if (m_lastStored)  {
1697 #if defined(JSONDEBUG)
1698             wxLogTrace(storeTraceMask, _T("(%s) comment added to m_lastStored (AFTER)"),
1699                  __PRETTY_FUNCTION__);
1700 #endif
1701             m_lastStored->AddComment(m_comment, wxJSONVALUE_COMMENT_AFTER);
1702         } else {
1703 #if defined(JSONDEBUG)
1704             wxLogTrace(storeTraceMask,
1705                 _T("(%s) cannot find a value for storing the AFTER comment"), __PRETTY_FUNCTION__);
1706 #endif
1707             AddError(_T("Cannot find a value for storing the comment (flag AFTER)"));
1708         }
1709     } else {       // comment BEFORE can only be added to the 'next' value
1710         if (m_next)  {
1711 #if defined(JSONDEBUG)
1712             wxLogTrace(storeTraceMask, _T("(%s) comment added to m_next (BEFORE)"),
1713                  __PRETTY_FUNCTION__);
1714 #endif
1715             m_next->AddComment(m_comment, wxJSONVALUE_COMMENT_BEFORE);
1716         } else {
1717             // cannot find a value for storing the comment
1718             AddError(_T("Cannot find a value for storing the comment (flag BEFORE)"));
1719         }
1720     }
1721     m_comment.clear();
1722 }
1723 
1724 
1725 //! Return the number of bytes that make a character in stream input
1726 /*!
1727  This function returns the number of bytes that represent a unicode
1728  code point in various encoding.
1729  For example, if the input stream is UTF-32 the function returns 4.
1730  Because the only recognized format for streams is UTF-8 the function
1731  just calls UTF8NumBytes() and returns.
1732  The function is, actually, not used at all.
1733 
1734 */
1735 int
NumBytes(char ch)1736 wxJSONReader::NumBytes(char ch) {
1737     int n = UTF8NumBytes(ch);
1738     return n;
1739 }
1740 
1741 //! Compute the number of bytes that makes a UTF-8 encoded wide character.
1742 /*!
1743  The function counts the number of '1' bit in the character \c ch and
1744  returns it.
1745  The UTF-8 encoding specifies the number of bytes needed by a wide character
1746  by coding it in the first byte. See below.
1747 
1748  Note that if the character does not contain a valid UTF-8 encoding
1749  the function returns -1.
1750 
1751 \code
1752    UCS-4 range (hex.)    UTF-8 octet sequence (binary)
1753    -------------------   -----------------------------
1754    0000 0000-0000 007F   0xxxxxxx
1755    0000 0080-0000 07FF   110xxxxx 10xxxxxx
1756    0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1757    0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1758    0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1759    0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
1760 \endcode
1761 */
1762 int
UTF8NumBytes(char ch)1763 wxJSONReader::UTF8NumBytes(char ch) {
1764     int num = 0;    // the counter of '1' bits
1765     for (int i = 0; i < 8; i++)  {
1766         if ((ch & 0x80) == 0)  {
1767             break;
1768         }
1769         ++num;
1770         ch = ch << 1;
1771     }
1772 
1773     // note that if the char contains more than six '1' bits it is not
1774     // a valid UTF-8 encoded character
1775     if (num > 6)  {
1776         num = -1;
1777     } else if (num == 0)  {
1778         num = 1;
1779     }
1780     return num;
1781 }
1782 
1783 //! Convert a UTF-8 memory buffer one char at a time
1784 /*!
1785  This function is used in ANSI mode when input from a stream is in UTF-8
1786  format and the UTF-8 buffer read cannot be converted to the locale
1787  wxString object.
1788  The function performs a char-by-char conversion of the buffer and appends
1789  every representable character to the string \c s.
1790  Characters that cannot be represented are stored as \e unicode \e escaped
1791  \e sequences in the form:
1792  \code
1793    \uXXXX
1794  \endcode
1795  where XXXX is a for-hex-digits Unicode code point.
1796  The function returns the number of characters that cannot be represented
1797  in the current locale.
1798 */
1799 int
ConvertCharByChar(wxString & s,const wxMemoryBuffer & utf8Buffer)1800 wxJSONReader::ConvertCharByChar(wxString& s, const wxMemoryBuffer& utf8Buffer) {
1801     size_t len  = utf8Buffer.GetDataLen();
1802     char*  buff = reinterpret_cast<char*>(utf8Buffer.GetData());
1803     char* buffEnd = buff + len;
1804 
1805     int result = 0;
1806     char temp[16];    // the UTF-8 code-point
1807 
1808     while (buff < buffEnd)    {
1809         temp[0] = *buff;    // the first UTF-8 code-unit
1810         // compute the number of code-untis that make one UTF-8 code-point
1811         int numBytes = NumBytes(*buff);
1812         ++buff;
1813         for (int i = 1; i < numBytes; i++)    {
1814             if (buff >= buffEnd)    {
1815                 break;
1816             }
1817             temp[i] = *buff;    // the first UTF-8 code-unit
1818             ++buff;
1819         }
1820         //if (buff >= buffEnd)    {
1821         //    break;
1822         //}
1823         // now convert 'temp' to a wide-character
1824         wchar_t dst[10];
1825         size_t outLength = wxConvUTF8.ToWChar(dst, 10, temp, numBytes);
1826 
1827         // now convert the wide char to a locale dependent character
1828         // len = wxConvLocal.FromWChar(temp, 16, dst, outLength);
1829         // len = wxConviso8859_1.FromWChar(temp, 16, dst, outLength);
1830         len = wxConvLibc.FromWChar(temp, 16, dst, outLength);
1831         if (len == wxCONV_FAILED)    {
1832             ++result;
1833             wxString t;
1834             t.Printf(_T("\\u%04X"), static_cast<int>(dst[0]));
1835             s.Append(t);
1836         } else {
1837             s.Append(temp[0], 1);
1838         }
1839     }        // end while
1840     return result;
1841 }
1842 
1843 //! Read a memory buffer type
1844 /*!
1845  This function is called by DoRead() when the single-quote character is
1846  encontered which starts a \e memory \e buffer type.
1847  This type is a \b wxJSON extension so the function emits a warning
1848  when such a type encontered.
1849  If the reader is constructed without the \c wxJSONREADER_MEMORYBUFF flag
1850  then the warning becomes an error.
1851  To know more about this JSON syntax extension read \ref wxjson_tutorial_memorybuff
1852 
1853  @param is the input stream
1854  @param val the JSON value that will hold the memory buffer value
1855  @return the last char read or -1 in case of EOF
1856 */
1857 
1858 //union byte {
1859 //    unsigned char c[2];
1860 //    short int b;
1861 //};
1862 
1863 int
ReadMemoryBuff(wxInputStream & is,wxJSONValue & val)1864 wxJSONReader::ReadMemoryBuff(wxInputStream& is, wxJSONValue& val) {
1865     static const wxChar* membuffError = _T("the \'memory buffer\' type contains %d invalid digits");
1866 
1867     AddWarning(wxJSONREADER_MEMORYBUFF, _T("the \'memory buffer\' type is not valid JSON text"));
1868 
1869     wxMemoryBuffer buff;
1870     int ch = 0; int errors = 0;
1871     unsigned char byte = 0;
1872     while (ch >= 0) {
1873         ch = ReadChar(is);
1874         if (ch < 0)  {
1875             break;
1876         }
1877         if (ch == '\'')  {
1878             break;
1879         }
1880         // the conversion is done two chars at a time
1881         unsigned char c1 = (unsigned char) ch;
1882         ch = ReadChar(is);
1883         if (ch < 0)  {
1884             break;
1885         }
1886         unsigned char c2 = (unsigned char) ch;
1887         c1 -= '0';
1888         c2 -= '0';
1889         if (c1 > 9)  {
1890             c1 -= 7;
1891         }
1892         if (c2 > 9)  {
1893             c2 -= 7;
1894         }
1895         if (c1 > 15)  {
1896             ++errors;
1897         } else if (c2 > 15)  {
1898             ++errors;
1899         } else {
1900             byte = (c1 * 16) + c2;
1901             buff.AppendByte(byte);
1902         }
1903     }   // end while
1904 
1905     if (errors > 0)  {
1906         wxString err;
1907         err.Printf(membuffError, errors);
1908         AddError(err);
1909     }
1910 
1911 
1912     // now assign the memory buffer object to the JSON-value 'value'
1913     // must check that:
1914     //   'value'  is invalid OR
1915     //   'value'  is a memory buffer; concatenate it
1916     if (!val.IsValid())   {
1917 #if defined(JSONDEBUG)
1918         wxLogTrace(traceMask, _T("(%s) assigning the memory buffer to value"), __PRETTY_FUNCTION__);
1919 #endif
1920         val = buff;
1921     } else if (val.IsMemoryBuff())  {
1922 #if defined(JSONDEBUG)
1923         wxLogTrace(traceMask, _T("(%s) concatenate memory buffer to value"), __PRETTY_FUNCTION__);
1924 #endif
1925         val.Cat(buff);
1926     } else {
1927         AddError(_T("Memory buffer value cannot follow another value"));
1928     }
1929 
1930     // store the input text's line number when the string was stored in 'val'
1931     val.SetLineNo(m_lineNo);
1932 
1933     // read the next char after the closing quotes and returns it
1934     if (ch >= 0)  {
1935         ch = ReadChar(is);
1936     }
1937     return ch;
1938 }
1939 
1940 
1941 
1942 
1943 #if defined(wxJSON_64BIT_INT)
1944 //! Converts a decimal string to a 64-bit signed integer
1945 /*!
1946  This function implements a simple variant
1947  of the \b strtoll C-library function.
1948  I needed this implementation because the wxString::To(U)LongLong
1949  function does not work on my system:
1950 
1951   \li GNU/Linux Fedora Core 6
1952   \li GCC version 4.1.1
1953   \li libc.so.6
1954 
1955  The wxWidgets library (actually I have installed version 2.8.7)
1956  relies on \b strtoll in order to do the conversion from a string
1957  to a long long integer but, in fact, it does not work because
1958  the 'wxHAS_STRTOLL' macro is not defined on my system.
1959  The problem only affects the Unicode builds while it seems
1960  that the wxString::To(U)LongLong function works in ANSI builds.
1961 
1962  Note that this implementation is not a complete substitute of the
1963  strtoll function because it only converts decimal strings (only base
1964  10 is implemented).
1965 
1966  @param str the string that contains the decimal literal
1967  @param i64 the pointer to long long which holds the converted value
1968 
1969  @return TRUE if the conversion succeeds
1970 */
1971 bool
Strtoll(const wxString & str,wxInt64 * i64)1972 wxJSONReader::Strtoll(const wxString& str, wxInt64* i64) {
1973     wxChar sign = ' ';
1974     wxUint64 ui64;
1975     bool r = DoStrto_ll(str, &ui64, &sign);
1976 
1977     // check overflow for signed long long
1978     switch (sign)  {
1979         case '-' :
1980             if (ui64 > (wxUint64) LLONG_MAX + 1)  {
1981                 r = false;
1982             } else {
1983                 *i64 = (wxInt64) (ui64 * -1);
1984             }
1985             break;
1986 
1987         // case '+' :
1988         default :
1989             if (ui64 > LLONG_MAX)  {
1990                 r = false;
1991             } else {
1992                 *i64 = (wxInt64) ui64;
1993             }
1994             break;
1995     }
1996     return r;
1997 }
1998 
1999 
2000 //! Converts a decimal string to a 64-bit unsigned integer.
2001 /*!
2002  Similar to \c Strtoll but for unsigned integers
2003 */
2004 bool
Strtoull(const wxString & str,wxUint64 * ui64)2005 wxJSONReader::Strtoull(const wxString& str, wxUint64* ui64) {
2006     wxChar sign = ' ';
2007     bool r = DoStrto_ll(str, ui64, &sign);
2008     if (sign == '-')  {
2009         r = false;
2010     }
2011     return r;
2012 }
2013 
2014 //! Perform the actual conversion from a string to a 64-bit integer
2015 /*!
2016  This function is called internally by the \c Strtoll and \c Strtoull functions
2017  and it does the actual conversion.
2018  The function is also able to check numeric overflow.
2019 
2020  @param str the string that has to be converted
2021  @param ui64 the pointer to a unsigned long long that holds the converted value
2022  @param sign the pointer to a wxChar character that will get the sign of the literal string, if any
2023  @return TRUE if the conversion succeeds
2024 */
2025 bool
DoStrto_ll(const wxString & str,wxUint64 * ui64,wxChar * sign)2026 wxJSONReader::DoStrto_ll(const wxString& str, wxUint64* ui64, wxChar* sign) {
2027   // the conversion is done by multiplying the individual digits
2028   // in reverse order to the corresponding power of 10
2029   //
2030   //  10's power:  987654321.9876543210
2031   //
2032   // LLONG_MAX:     9223372036854775807
2033   // LLONG_MIN:    -9223372036854775808
2034   // ULLONG_MAX:   18446744073709551615
2035   //
2036   // the function does not take into account the sign: only a
2037   // unsigned long long int is returned
2038 
2039     int maxDigits = 20;       // 20 + 1 (for the sign)
2040 
2041     wxUint64 power10[] = {
2042     wxULL(1),
2043     wxULL(10),
2044     wxULL(100),
2045     wxULL(1000),
2046     wxULL(10000),
2047     wxULL(100000),
2048     wxULL(1000000),
2049     wxULL(10000000),
2050     wxULL(100000000),
2051     wxULL(1000000000),
2052     wxULL(10000000000),
2053     wxULL(100000000000),
2054     wxULL(1000000000000),
2055     wxULL(10000000000000),
2056     wxULL(100000000000000),
2057     wxULL(1000000000000000),
2058     wxULL(10000000000000000),
2059     wxULL(100000000000000000),
2060     wxULL(1000000000000000000),
2061     wxULL(10000000000000000000)
2062   };
2063 
2064 
2065     wxUint64 temp1 = wxULL(0);   // the temporary converted integer
2066 
2067     int strLen = str.length();
2068     if (strLen == 0)  {
2069         // an empty string is converted to a ZERO value: the function succeeds
2070         *ui64 = wxLL(0);
2071         return true;
2072     }
2073 
2074     int index = 0;
2075     wxChar ch = str[0];
2076     if (ch == '+' || ch == '-')  {
2077         *sign = ch;
2078         ++index;
2079         ++maxDigits;
2080     }
2081 
2082     if (strLen > maxDigits)  {
2083         return false;
2084     }
2085 
2086     // check the overflow: check the string length and the individual digits
2087     // of the string; the overflow is checked for unsigned long long
2088     if (strLen == maxDigits)  {
2089         wxString uLongMax(_T("18446744073709551615"));
2090         int j = 0;
2091         for (int i = index; i < strLen - 1; i++)  {
2092             ch = str[i];
2093             if (ch < '0' || ch > '9') {
2094                 return false;
2095             }
2096             if (ch > uLongMax[j]) {
2097                 return false;
2098             }
2099             if (ch < uLongMax[j]) {
2100                 break;
2101             }
2102             ++j;
2103         }
2104     }
2105 
2106     // get the digits in the reverse order and multiply them by the
2107     // corresponding power of 10
2108     int exponent = 0;
2109     for (int i = strLen - 1; i >= index; i--)   {
2110         wxChar ch = str[i];
2111         if (ch < '0' || ch > '9') {
2112             return false;
2113         }
2114         ch = ch - '0';
2115         // compute the new temporary value
2116         temp1 += ch * power10[exponent];
2117         ++exponent;
2118     }
2119     *ui64 = temp1;
2120     return true;
2121 }
2122 
2123 #endif       // defined(wxJSON_64BIT_INT)
2124 
2125 /*
2126 {
2127 }
2128 */
2129 
2130 
2131 
2132