1 /////////////////////////////////////////////////////////////////////////////
2 // Name: jsonreader.cpp
3 // Purpose: the wxJSONReader class: a JSON text parser
4 // Author: Luciano Cattani
5 // Created: 2007/10/14
6 // RCS-ID: $Id: jsonreader.cpp,v 1.12 2008/03/12 10:48:19 luccat Exp $
7 // Copyright: (c) 2007 Luciano Cattani
8 // Licence: wxWidgets licence
9 /////////////////////////////////////////////////////////////////////////////
10
11 #ifdef NDEBUG
12 // make wxLogTrace a noop if no debug set, it's really slow
13 // must be defined before including debug.h
14 #define wxDEBUG_LEVEL 0
15 #endif
16
17 #include <wx/jsonreader.h>
18
19 #include <wx/mstream.h>
20 #include <wx/sstream.h>
21 #include <wx/debug.h>
22 #include <wx/log.h>
23
24
25
26 /*! \class wxJSONReader
27 \brief The JSON parser
28
29 The class is a JSON parser which reads a JSON formatted text and stores
30 values in the \c wxJSONValue structure.
31 The ctor accepts two parameters: the \e style flag, which controls how
32 much error-tolerant should the parser be and an integer which is
33 the maximum number of errors and warnings that have to be reported
34 (the default is 30).
35
36 If the JSON text document does not contain an open/close JSON character the
37 function returns an \b invalid value object; in other words, the
38 wxJSONValue::IsValid() function returns FALSE.
39 This is the case of a document that is empty or contains only
40 whitespaces or comments.
41 If the document contains a starting object/array character immediatly
42 followed by a closing object/array character
43 (i.e.: \c {} ) then the function returns an \b empty array or object
44 JSON value.
45 This is a valid JSON object of type wxJSONTYPE_OBJECT or wxJSONTYPE_ARRAY
46 whose wxJSONValue::Size() function returns ZERO.
47
48 \par JSON text
49
50 The wxJSON parser just skips all characters read from the
51 input JSON text until the start-object '{' or start-array '[' characters
52 are encontered (see the GetStart() function).
53 This means that the JSON input text may contain anything
54 before the first start-object/array character except these two chars themselves
55 unless they are included in a C/C++ comment.
56 Comment lines that apear before the first start array/object character,
57 are non ignored if the parser is constructed with the wxJSONREADER_STORE_COMMENT
58 flag: they are added to the comment's array of the root JSON value.
59
60 Note that the parsing process stops when the internal DoRead() function
61 returns. Because that function is recursive, the top-level close-object
62 '}' or close-array ']' character cause the top-level DoRead() function
63 to return thus stopping the parsing process regardless the EOF condition.
64 This means that the JSON input text may contain anything \b after
65 the top-level close-object/array character.
66 Here are some examples:
67
68 Returns a wxJSONTYPE_INVALID value (invalid JSON value)
69 \code
70 // this text does not contain an open array/object character
71 \endcode
72
73 Returns a wxJSONTYPE_OBJECT value of Size() = 0
74 \code
75 {
76 }
77 \endcode
78
79 Returns a wxJSONTYPE_ARRAY value of Size() = 0
80 \code
81 [
82 ]
83 \endcode
84
85 Text before and after the top-level open/close characters is ignored.
86 \code
87 This non-JSON text does not cause the parser to report errors or warnings
88 {
89 }
90 This non-JSON text does not cause the parser to report errors or warnings
91 \endcode
92
93
94 \par Extensions
95
96 The wxJSON parser recognizes all JSON text plus some extensions
97 that are not part of the JSON syntax but that many other JSON
98 implementations do recognize.
99 If the input text contains the following non-JSON text, the parser
100 reports the situation as \e warnings and not as \e errors unless
101 the parser object was constructed with the wxJSONREADER_STRICT
102 flag. In the latter case the wxJSON parser is not tolerant.
103
104 \li C/C++ comments: the parser recognizes C and C++ comments.
105 Comments can optionally be stored in the value they refer
106 to and can also be written back to the JSON text document.
107 To know more about comment storage see \ref wxjson_comments
108
109 \li case tolerance: JSON syntax states that the literals \c null,
110 \c true and \c false must be lowercase; the wxJSON parser
111 also recognizes mixed case literals such as, for example,
112 \b Null or \b FaLSe. A \e warning is emitted.
113
114 \li wrong or missing closing character: wxJSON parser is tolerant
115 about the object / array closing character. When an open-array
116 character '[' is encontered, the parser expects the
117 corresponding close-array character ']'. If the character
118 encontered is a close-object char '}' a warning is reported.
119 A warning is also reported if the character is missing when
120 the end-of-file is reached.
121
122 \li multi-line strings: this feature allows a JSON string type to be
123 splitted in two or more lines as in the standard C/C++
124 languages. The drawback is that this feature is error-prone
125 and you have to use it with care.
126 For more info about this topic read \ref wxjson_tutorial_style_split
127
128 Note that you can control how much error-tolerant should the parser be
129 and also you can specify how many and what extensions are recognized.
130 See the constructor's parameters for more details.
131
132 \par Unicode vs ANSI
133
134 The parser can read JSON text from two very different kind of objects:
135
136 \li a string object (\b wxString)
137 \li a stream object (\b wxInputStream)
138
139 When the input is from a string object, the character represented in the
140 string is platform- and mode- dependant; in other words, characters are
141 represented differently: in ANSI builds they depend on the charset in use
142 and in Unicode builds they depend on the platform (UCS-2 on win32, UCS-4
143 or UTF-8 on GNU/Linux).
144
145 When the input is from a stream object, the only recognized encoding format
146 is UTF-8 for both ANSI and Unicode builds.
147
148 \par Example:
149
150 \code
151 wxJSONValue value;
152 wxJSONReader reader;
153
154 // open a text file that contains the UTF-8 encoded JSON text
155 wxFFileInputStream jsonText( _T("filename.utf8"), _T("r"));
156
157 // read the file
158 int numErrors = reader.Parse( jsonText, &value );
159
160 if ( numErrors > 0 ) {
161 ::MessageBox( _T("Error reading the input file"));
162 }
163 \endcode
164
165 Starting from version 1.1.0 the wxJSON reader and the writer has changed in
166 their internal organization.
167 To know more about ANSI and Unicode mode read \ref wxjson_tutorial_unicode.
168 */
169
170
171
172 // if you have the debug build of wxWidgets and wxJSON you can see
173 // trace messages by setting the:
174 // WXTRACE=traceReader StoreComment
175 // environment variable
176 #if wxDEBUG_LEVEL > 0
177 static const wxChar* traceMask = _T("traceReader");
178 static const wxChar* storeTraceMask = _T("StoreComment");
179 #endif
180
181 //! Ctor
182 /*!
183 Construct a JSON parser object with the given parameters.
184
185 JSON parser objects should always be constructed on the stack but
186 it does not hurt to have a global JSON parser.
187
188 \param flags this paramter controls how much error-tolerant should the
189 parser be
190
191 \param maxErrors the maximum number of errors (and warnings, too) that are
192 reported by the parser. When the number of errors reaches this limit,
193 the parser stops to read the JSON input text and no other error is
194 reported.
195
196 The \c flag parameter is the combination of ZERO or more of the
197 following constants OR'ed toghether:
198
199 \li wxJSONREADER_ALLOW_COMMENTS: C/C++ comments are recognized by the
200 parser; a warning is reported by the parser
201 \li wxJSONREADER_STORE_COMMENTS: C/C++ comments, if recognized, are
202 stored in the value they refer to and can be rewritten back to
203 the JSON text
204 \li wxJSONREADER_CASE: the parser recognizes mixed-case literal strings
205 \li wxJSONREADER_MISSING: the parser allows missing or wrong close-object
206 and close-array characters
207 \li wxJSONREADER_MULTISTRING: strings may be splitted in two or more
208 lines
209 \li wxJSONREADER_COMMENTS_AFTER: if STORE_COMMENTS if defined, the parser
210 assumes that comment lines apear \b before the value they
211 refer to unless this constant is specified. In the latter case,
212 comments apear \b after the value they refer to.
213 \li wxJSONREADER_NOUTF8_STREAM: suppress UTF-8 conversion when reading a
214 string value from a stream: the reader assumes that the input stream
215 is encoded in ANSI format and not in UTF-8; only meaningfull in ANSI
216 builds, this flag is simply ignored in Unicode builds.
217
218 You can also use the following shortcuts to specify some predefined
219 flag's combinations:
220
221 \li wxJSONREADER_STRICT: all wxJSON extensions are reported as errors, this
222 is the same as specifying a ZERO value as \c flags.
223 \li wxJSONREADER_TOLERANT: this is the same as ALLOW_COMMENTS | CASE |
224 MISSING | MULTISTRING; all wxJSON extensions are turned on but comments
225 are not stored in the value objects.
226
227 \par Example:
228
229 The following code fragment construct a JSON parser, turns on all
230 wxJSON extensions and also stores C/C++ comments in the value object
231 they refer to. The parser assumes that the comments apear before the
232 value:
233
234 \code
235 wxJSONReader reader( wxJSONREADER_TOLERANT | wxJSONREADER_STORE_COMMENTS );
236 wxJSONValue root;
237 int numErrors = reader.Parse( jsonText, &root );
238 \endcode
239 */
wxJSONReader(int flags,int maxErrors)240 wxJSONReader::wxJSONReader( int flags, int maxErrors )
241 {
242 m_flags = flags;
243 m_maxErrors = maxErrors;
244 m_noUtf8 = false;
245 #if !defined( wxJSON_USE_UNICODE )
246 // in ANSI builds we can suppress UTF-8 conversion for both the writer and the reader
247 if ( m_flags & wxJSONREADER_NOUTF8_STREAM ) {
248 m_noUtf8 = true;
249 }
250 #endif
251
252 }
253
254 //! Dtor - does nothing
~wxJSONReader()255 wxJSONReader::~wxJSONReader()
256 {
257 }
258
259 //! Parse the JSON document.
260 /*!
261 The two overloaded versions of the \c Parse() function read a
262 JSON text stored in a wxString object or in a wxInputStream
263 object.
264
265 If \c val is a NULL pointer, the function does not store the
266 values: it can be used as a JSON checker in order to check the
267 syntax of the document.
268 Returns the number of \b errors found in the document.
269 If the returned value is ZERO and the parser was constructed
270 with the \c wxJSONREADER_STRICT flag, then the parsed document
271 is \e well-formed and it only contains valid JSON text.
272
273 If the \c wxJSONREADER_TOLERANT flag was used in the parser's
274 constructor, then a return value of ZERO
275 does not mean that the document is \e well-formed because it may
276 contain comments and other extensions that are not fatal for the
277 wxJSON parser but other parsers may fail to recognize.
278 You can use the \c GetWarningCount() function to know how many
279 wxJSON extensions are present in the JSON input text.
280
281 Note that the JSON value object \c val is not cleared by this
282 function unless its type is of the wrong type.
283 In other words, if \c val is of type wxJSONTYPE_ARRAY and it already
284 contains 10 elements and the input document starts with a
285 '[' (open-array char) then the elements read from the document are
286 \b appended to the existing ones.
287
288 On the other hand, if the text document starts with a '{' (open-object) char
289 then this function must change the type of the \c val object to
290 \c wxJSONTYPE_OBJECT and the old content of 10 array elements will be lost.
291
292 \par Different input types
293
294 The real parsing process in done using UTF-8 streams. If the input is
295 from a \b wxString object, the Parse function first converts the input string
296 in a temporary \b wxMemoryInputStream which contains the UTF-8 conversion
297 of the string itself.
298 Next, the overloaded Parse function is called.
299
300 @param doc the JSON text that has to be parsed
301 @param val the wxJSONValue object that contains the parsed text; if NULL the
302 parser do not store anything but errors and warnings are reported
303 @return the total number of errors encontered
304 */
305 int
Parse(const wxString & doc,wxJSONValue * val)306 wxJSONReader:: Parse( const wxString& doc, wxJSONValue* val )
307 {
308 #if !defined( wxJSON_USE_UNICODE )
309 // in ANSI builds input from a string never use UTF-8 conversion
310 bool noUtf8_bak = m_noUtf8; // save the current setting
311 m_noUtf8 = true;
312 #endif
313
314 // convert the string to a UTF-8 / ANSI memory stream and calls overloaded Parse()
315 char* readBuff = 0;
316 wxCharBuffer utf8CB = doc.ToUTF8(); // the UTF-8 buffer
317 #if !defined( wxJSON_USE_UNICODE )
318 wxCharBuffer ansiCB( doc.c_str()); // the ANSI buffer
319 if ( m_noUtf8 ) {
320 readBuff = ansiCB.data();
321 }
322 else {
323 readBuff = utf8CB.data();
324 }
325 #else
326 readBuff = utf8CB.data();
327 #endif
328
329 // now construct the temporary memory input stream
330 size_t len = strlen( readBuff );
331 wxMemoryInputStream is( readBuff, len );
332
333 int numErr = Parse( is, val );
334 #if !defined( wxJSON_USE_UNICODE )
335 m_noUtf8 = noUtf8_bak;
336 #endif
337 return numErr;
338 }
339
340 //! \overload Parse( const wxString&, wxJSONValue* )
341 int
Parse(wxInputStream & is,wxJSONValue * val)342 wxJSONReader::Parse( wxInputStream& is, wxJSONValue* val )
343 {
344 // if val == 0 the 'temp' JSON value will be passed to DoRead()
345 wxJSONValue temp;
346 m_level = 0;
347 m_depth = 0;
348 m_lineNo = 1;
349 m_colNo = 1;
350 m_peekChar = -1;
351 m_errors.clear();
352 m_warnings.clear();
353
354 // if a wxJSONValue is not passed to the Parse function
355 // we set the temparary object created on the stack
356 // I know this will slow down the validation of input
357 if ( val == 0 ) {
358 val = &temp;
359 }
360 wxASSERT( val );
361
362 // set the wxJSONValue object's pointers for comment storage
363 m_next = val;
364 m_next->SetLineNo( -1 );
365 m_lastStored = 0;
366 m_current = 0;
367
368 int ch = GetStart( is );
369 switch ( ch ) {
370 case '{' :
371 val->SetType( wxJSONTYPE_OBJECT );
372 break;
373 case '[' :
374 val->SetType( wxJSONTYPE_ARRAY );
375 break;
376 default :
377 AddError( _T("Cannot find a start object/array character" ));
378 return m_errors.size();
379 break;
380 }
381
382 // returning from DoRead() could be for EOF or for
383 // the closing array-object character
384 // if -1 is returned, it is as an error because the lack
385 // of close-object/array characters
386 // note that the missing close-chars error messages are
387 // added by the DoRead() function
388 ch = DoRead( is, *val );
389 return m_errors.size();
390 }
391
392
393 //! Returns the start of the document
394 /*!
395 This is the first function called by the Parse() function and it searches
396 the input stream for the starting character of a JSON text and returns it.
397 JSON text start with '{' or '['.
398 If the two starting characters are inside a C/C++ comment, they
399 are ignored.
400 Returns the JSON-text start character or -1 on EOF.
401
402 @param is the input stream that contains the JSON text
403 @return -1 on errors or EOF; one of '{' or '['
404 */
405 int
GetStart(wxInputStream & is)406 wxJSONReader::GetStart( wxInputStream& is )
407 {
408 int ch = 0;
409 do {
410 switch ( ch ) {
411 case 0 :
412 ch = ReadChar( is );
413 break;
414 case '{' :
415 return ch;
416 break;
417 case '[' :
418 return ch;
419 break;
420 case '/' :
421 ch = SkipComment( is );
422 StoreComment( 0 );
423 break;
424 default :
425 ch = ReadChar( is );
426 break;
427 }
428 } while ( ch >= 0 );
429 return ch;
430 }
431
432 //! Return a reference to the error message's array.
433 const wxArrayString&
GetErrors() const434 wxJSONReader::GetErrors() const
435 {
436 return m_errors;
437 }
438
439 //! Return a reference to the warning message's array.
440 const wxArrayString&
GetWarnings() const441 wxJSONReader::GetWarnings() const
442 {
443 return m_warnings;
444 }
445
446 //! Return the depth of the JSON input text
447 /*!
448 The function returns the number of times the recursive \c DoRead function was
449 called in the parsing process thus returning the maximum depth of the JSON
450 input text.
451 */
452 int
GetDepth() const453 wxJSONReader::GetDepth() const
454 {
455 return m_depth;
456 }
457
458
459
460 //! Return the size of the error message's array.
461 int
GetErrorCount() const462 wxJSONReader::GetErrorCount() const
463 {
464 return m_errors.size();
465 }
466
467 //! Return the size of the warning message's array.
468 int
GetWarningCount() const469 wxJSONReader::GetWarningCount() const
470 {
471 return m_warnings.size();
472 }
473
474
475 //! Read a character from the input JSON document.
476 /*!
477 The function returns the next byte from the UTF-8 stream as an INT.
478 In case of errors or EOF, the function returns -1.
479 The function also updates the \c m_lineNo and \c m_colNo data
480 members and converts all CR+LF sequence in LF.
481
482 This function only returns one byte UTF-8 (one code unit)
483 at a time and not Unicode code points.
484 The only reason for this function is to process line and column
485 numbers.
486
487 @param is the input stream that contains the JSON text
488 @return the next char (one single byte) in the input stream or -1 on error or EOF
489 */
490 int
ReadChar(wxInputStream & is)491 wxJSONReader::ReadChar( wxInputStream& is )
492 {
493 if ( is.Eof()) {
494 return -1;
495 }
496
497 unsigned char ch = is.GetC();
498 size_t last = is.LastRead(); // returns ZERO if EOF
499 if ( last == 0 ) {
500 return -1;
501 }
502
503 // the function also converts CR in LF. only LF is returned
504 // in the case of CR+LF
505 int nextChar;
506
507 if ( ch == '\r' ) {
508 m_colNo = 1;
509 nextChar = PeekChar( is );
510 if ( nextChar == -1 ) {
511 return -1;
512 }
513 else if ( nextChar == '\n' ) {
514 ch = is.GetC();
515 }
516 }
517 if ( ch == '\n' ) {
518 ++m_lineNo;
519 m_colNo = 1;
520 }
521 else {
522 ++m_colNo;
523 }
524 return (int) ch;
525 }
526
527
528 //! Peek a character from the input JSON document
529 /*!
530 This function just calls the \b Peek() function on the stream
531 and returns it.
532
533 @param is the input stream that contains the JSON text
534 @return the next char (one single byte) in the input stream or -1 on error or EOF
535 */
536 int
PeekChar(wxInputStream & is)537 wxJSONReader::PeekChar( wxInputStream& is )
538 {
539 int ch = -1; unsigned char c;
540 if ( !is.Eof()) {
541 c = is.Peek();
542 ch = c;
543 }
544 return ch;
545 }
546
547
548 //! Reads the JSON text document (internal use)
549 /*!
550 This is a recursive function that is called by \c Parse()
551 and by the \c DoRead() function itself when a new object /
552 array character is encontered.
553 The function returns when a EOF condition is encontered or
554 when the corresponding close-object / close-array char is encontered.
555 The function also increments the \c m_level
556 data member when it is entered and decrements it on return.
557 It also sets \c m_depth equal to \c m_level if \c m_depth is
558 less than \c m_level.
559
560 The function is the heart of the wxJSON parser class but it is
561 also very easy to understand because JSON syntax is very
562 easy.
563
564 Returns the last close-object/array character read or -1 on EOF.
565
566 @param is the input stream that contains the JSON text
567 @param parent the JSON value object that is the parent of all subobjects
568 read by the function until the next close-object/array (for
569 the top-level \c DoRead function \c parent is the root JSON object)
570 @return one of close-array or close-object char or -1 on error or EOF
571 */
572 int
DoRead(wxInputStream & is,wxJSONValue & parent)573 wxJSONReader::DoRead( wxInputStream& is, wxJSONValue& parent )
574 {
575 ++m_level;
576 if ( m_depth < m_level ) {
577 m_depth = m_level;
578 }
579
580 // 'value' is the wxJSONValue structure that has to be
581 // read. Data read from the JSON text input is stored
582 // in the following object.
583 wxJSONValue value( wxJSONTYPE_INVALID );
584
585 // sets the pointers to the current, next and last-stored objects
586 // in order to determine the value to which a comment refers to
587 m_next = &value;
588 m_current = &parent;
589 m_current->SetLineNo( m_lineNo );
590 m_lastStored = 0;
591
592 // the 'key' string is stored from 'value' when a ':' is encontered
593 wxString key;
594
595 // the character read: -1=EOF, 0=to be read
596 int ch=0;
597
598 do { // we read until ch < 0
599 switch ( ch ) {
600 case 0 :
601 ch = ReadChar( is );
602 break;
603 case ' ' :
604 case '\t' :
605 case '\n' :
606 case '\r' :
607 ch = SkipWhiteSpace( is );
608 break;
609 case -1 : // the EOF
610 break;
611 case '/' :
612 ch = SkipComment( is );
613 StoreComment( &parent );
614 break;
615
616 case '{' :
617 if ( parent.IsObject() ) {
618 if ( key.empty() ) {
619 AddError( _T("\'{\' is not allowed here (\'name\' is missing") );
620 }
621 if ( value.IsValid() ) {
622 AddError( _T("\'{\' cannot follow a \'value\'") );
623 }
624 }
625 else if ( parent.IsArray() ) {
626 if ( value.IsValid() ) {
627 AddError( _T("\'{\' cannot follow a \'value\' in JSON array") );
628 }
629 }
630 else {
631 wxJSON_ASSERT( 0 ); // always fails
632 }
633
634 // the openobject char cause the DoRead() to be called recursively
635 value.SetType( wxJSONTYPE_OBJECT );
636 ch = DoRead( is, value );
637 break;
638
639 case '}' :
640 if ( !parent.IsObject() ) {
641 AddWarning( wxJSONREADER_MISSING,
642 _T("Trying to close an array using the \'}\' (close-object) char" ));
643 }
644 // close-object: store the current value, if any
645 StoreValue( ch, key, value, parent );
646 m_current = &parent;
647 m_next = 0;
648 m_current->SetLineNo( m_lineNo );
649 ch = ReadChar( is );
650 return ch;
651 break;
652
653 case '[' :
654 if ( parent.IsObject() ) {
655 if ( key.empty() ) {
656 AddError( _T("\'[\' is not allowed here (\'name\' is missing") );
657 }
658 if ( value.IsValid() ) {
659 AddError( _T("\'[\' cannot follow a \'value\' text") );
660 }
661 }
662 else if ( parent.IsArray()) {
663 if ( value.IsValid() ) {
664 AddError( _T("\'[\' cannot follow a \'value\'") );
665 }
666 }
667 else {
668 wxJSON_ASSERT( 0 ); // always fails
669 }
670 // open-array cause the DoRead() to be called recursively
671 value.SetType( wxJSONTYPE_ARRAY );
672 ch = DoRead( is, value );
673 break;
674
675 case ']' :
676 if ( !parent.IsArray() ) {
677 // wrong close-array char (should be close-object)
678 AddWarning( wxJSONREADER_MISSING,
679 _T("Trying to close an object using the \']\' (close-array) char" ));
680 }
681 StoreValue( ch, key, value, parent );
682 m_current = &parent;
683 m_next = 0;
684 m_current->SetLineNo( m_lineNo );
685 return 0; // returning ZERO for reading the next char
686 break;
687
688 case ',' :
689 // store the value, if any
690 StoreValue( ch, key, value, parent );
691 key.clear();
692 ch = ReadChar( is );
693 break;
694
695 case '\"' :
696 ch = ReadString( is, value ); // read a JSON string type
697 m_current = &value;
698 m_next = 0;
699 break;
700
701 case '\'' :
702 ch = ReadMemoryBuff( is, value ); // read a memory buffer type
703 m_current = &value;
704 m_next = 0;
705 break;
706
707 case ':' : // key / value separator
708 m_current = &value;
709 m_current->SetLineNo( m_lineNo );
710 m_next = 0;
711 if ( !parent.IsObject() ) {
712 AddError( _T( "\':\' can only used in object's values" ));
713 }
714 else if ( !value.IsString() ) {
715 AddError( _T( "\':\' follows a value which is not of type \'string\'" ));
716 }
717 else if ( !key.empty() ) {
718 AddError( _T( "\':\' not allowed where a \'name\' string was already available" ));
719 }
720 else {
721 // the string in 'value' is set as the 'key'
722 key = value.AsString();
723 value.SetType( wxJSONTYPE_INVALID );
724 }
725 ch = ReadChar( is );
726 break;
727
728 default :
729 // no special char: it is a literal or a number
730 // errors are checked in the 'ReadValue()' function.
731 m_current = &value;
732 m_current->SetLineNo( m_lineNo );
733 m_next = 0;
734 ch = ReadValue( is, ch, value );
735 break;
736 } // end switch
737 } while ( ch >= 0 );
738
739 // the DoRead() should return when the close-object/array char is encontered
740 // if we are here, the EOF condition was encontered so one or more close-something
741 // characters are missing
742 if ( parent.IsArray() ) {
743 AddWarning( wxJSONREADER_MISSING, _T("\']\' missing at end of file"));
744 }
745 else if ( parent.IsObject() ) {
746 AddWarning( wxJSONREADER_MISSING, _T("\'}\' missing at end of file"));
747 }
748 else {
749 wxJSON_ASSERT( 0 );
750 }
751
752 // we store the value, as there is a missing close-object/array char
753 StoreValue( ch, key, value, parent );
754
755 --m_level;
756 return ch;
757 }
758
759 //! Store a value in the parent object.
760 /*!
761 The function is called by \c DoRead() when a the comma
762 or a close-object/array character is encontered and stores the current
763 value read by the parser in the parent object.
764 The function checks that \c value is not invalid and that \c key is
765 not an empty string if \c parent is an object.
766
767 \param ch the character read: a comma or close objecty/array char
768 \param key the \b key string: must be empty if \c parent is an array
769 \param value the current JSON value to be stored in \c parent
770 \param parent the JSON value that is the parent of \c value.
771 \return none
772 */
773 void
StoreValue(int ch,const wxString & key,wxJSONValue & value,wxJSONValue & parent)774 wxJSONReader::StoreValue( int ch, const wxString& key, wxJSONValue& value, wxJSONValue& parent )
775 {
776 // if 'ch' == } or ] than value AND key may be empty when a open object/array
777 // is immediatly followed by a close object/array
778 //
779 // if 'ch' == , (comma) value AND key (for TypeMap) cannot be empty
780 //
781 wxLogTrace( traceMask, _T("(%s) ch=%d char=%c"), __PRETTY_FUNCTION__, ch, (char) ch);
782 wxLogTrace( traceMask, _T("(%s) value=%s"), __PRETTY_FUNCTION__, value.AsString().c_str());
783
784 m_current = 0;
785 m_next = &value;
786 m_lastStored = 0;
787 m_next->SetLineNo( -1 );
788
789 if ( !value.IsValid() && key.empty() ) {
790 // OK, if the char read is a close-object or close-array
791 if ( ch == '}' || ch == ']' ) {
792 m_lastStored = 0;
793 wxLogTrace( traceMask, _T("(%s) key and value are empty, returning"),
794 __PRETTY_FUNCTION__);
795 }
796 else {
797 AddError( _T("key or value is missing for JSON value"));
798 }
799 }
800 else {
801 // key or value are not empty
802 if ( parent.IsObject() ) {
803 if ( !value.IsValid() ) {
804 AddError( _T("cannot store the value: \'value\' is missing for JSON object type"));
805 }
806 else if ( key.empty() ) {
807 AddError( _T("cannot store the value: \'key\' is missing for JSON object type"));
808 }
809 else {
810 // OK, adding the value to parent key/value map
811 wxLogTrace( traceMask, _T("(%s) adding value to key:%s"),
812 __PRETTY_FUNCTION__, key.c_str());
813 parent[key] = value;
814 m_lastStored = &(parent[key]);
815 m_lastStored->SetLineNo( m_lineNo );
816 }
817 }
818 else if ( parent.IsArray() ) {
819 if ( !value.IsValid() ) {
820 AddError( _T("cannot store the item: \'value\' is missing for JSON array type"));
821 }
822 if ( !key.empty() ) {
823 AddError( _T("cannot store the item: \'key\' (\'%s\') is not permitted in JSON array type"), key);
824 }
825 wxLogTrace( traceMask, _T("(%s) appending value to parent array"),
826 __PRETTY_FUNCTION__ );
827 parent.Append( value );
828 const wxJSONInternalArray* arr = parent.AsArray();
829 wxJSON_ASSERT( arr );
830 m_lastStored = &(arr->Last());
831 m_lastStored->SetLineNo( m_lineNo );
832 }
833 else {
834 wxJSON_ASSERT( 0 ); // should never happen
835 }
836 }
837 value.SetType( wxJSONTYPE_INVALID );
838 value.ClearComments();
839 }
840
841 //! Add a error message to the error's array
842 /*!
843 The overloaded versions of this function add an error message to the
844 error's array stored in \c m_errors.
845 The error message is formatted as follows:
846
847 \code
848 Error: line xxx, col xxx - <error_description>
849 \endcode
850
851 The \c msg parameter is the description of the error; line's and column's
852 number are automatically added by the functions.
853 The \c fmt parameter is a format string that has the same syntax as the \b printf
854 function.
855 Note that it is the user's responsability to provide a format string suitable
856 with the arguments: another string or a character.
857 */
858 void
AddError(const wxString & msg)859 wxJSONReader::AddError( const wxString& msg )
860 {
861 wxString err;
862 err.Printf( _T("Error: line %d, col %d - %s"), m_lineNo, m_colNo, msg.c_str() );
863
864 wxLogTrace( traceMask, _T("(%s) %s"), __PRETTY_FUNCTION__, err.c_str());
865
866 if ( (int) m_errors.size() < m_maxErrors ) {
867 m_errors.Add( err );
868 }
869 else if ( (int) m_errors.size() == m_maxErrors ) {
870 m_errors.Add( _T("ERROR: too many error messages - ignoring further errors"));
871 }
872 // else if ( m_errors > m_maxErrors ) do nothing, thus ignore the error message
873 }
874
875 //! \overload AddError( const wxString& )
876 void
AddError(const wxString & fmt,const wxString & str)877 wxJSONReader::AddError( const wxString& fmt, const wxString& str )
878 {
879 wxString s;
880 s.Printf( fmt.c_str(), str.c_str() );
881 AddError( s );
882 }
883
884 //! \overload AddError( const wxString& )
885 void
AddError(const wxString & fmt,wxChar c)886 wxJSONReader::AddError( const wxString& fmt, wxChar c )
887 {
888 wxString s;
889 s.Printf( fmt.c_str(), c );
890 AddError( s );
891 }
892
893 //! Add a warning message to the warning's array
894 /*!
895 The warning description is as follows:
896 \code
897 Warning: line xxx, col xxx - <warning_description>
898 \endcode
899
900 Warning messages are generated by the parser when the JSON
901 text that has been read is not well-formed but the
902 error is not fatal and the parser recognizes the text
903 as an extension to the JSON standard (see the parser's ctor
904 for more info about wxJSON extensions).
905
906 Note that the parser has to be constructed with a flag that
907 indicates if each individual wxJSON extension is on.
908 If the warning message is related to an extension that is not
909 enabled in the parser's \c m_flag data member, this function
910 calls AddError() and the warning message becomes an error
911 message.
912 The \c type parameter is one of the same constants that
913 specify the parser's extensions.
914 If type is ZERO than the function always adds a warning
915 */
916 void
AddWarning(int type,const wxString & msg)917 wxJSONReader::AddWarning( int type, const wxString& msg )
918 {
919 // if 'type' AND 'm_flags' == 1 than the extension is
920 // ON. Otherwise it is OFF anf the function calls AddError()
921 if ( type != 0 ) {
922 if ( ( type & m_flags ) == 0 ) {
923 AddError( msg );
924 return;
925 }
926 }
927
928 wxString err;
929 err.Printf( _T( "Warning: line %d, col %d - %s"), m_lineNo, m_colNo, msg.c_str() );
930
931 wxLogTrace( traceMask, _T("(%s) %s"), __PRETTY_FUNCTION__, err.c_str());
932 if ( (int) m_warnings.size() < m_maxErrors ) {
933 m_warnings.Add( err );
934 }
935 else if ( (int) m_warnings.size() == m_maxErrors ) {
936 m_warnings.Add( _T("Error: too many warning messages - ignoring further warnings"));
937 }
938 // else do nothing, thus ignore the warning message
939 }
940
941 //! Skip all whitespaces.
942 /*!
943 The function reads characters from the input text
944 and returns the first non-whitespace character read or -1
945 if EOF.
946 Note that the function does not rely on the \b isspace function
947 of the C library but checks the space constants: space, TAB and
948 LF.
949 */
950 int
SkipWhiteSpace(wxInputStream & is)951 wxJSONReader::SkipWhiteSpace( wxInputStream& is )
952 {
953 // just read one byte at a time and check for whitespaces
954 int ch;
955 do {
956 ch = ReadChar( is );
957 if ( ch < 0 ) {
958 break;
959 }
960 }
961 while ( ch == ' ' || ch == '\n' || ch == '\t' );
962 wxLogTrace( traceMask, _T("(%s) end whitespaces line=%d col=%d"),
963 __PRETTY_FUNCTION__, m_lineNo, m_colNo );
964 return ch;
965 }
966
967 //! Skip a comment
968 /*!
969 The function is called by DoRead() when a '/' (slash) character
970 is read from the input stream assuming that a C/C++ comment is starting.
971 Returns the first character that follows the comment or
972 -1 on EOF.
973 The function also adds a warning message because comments are not
974 valid JSON text.
975 The function also stores the comment, if any, in the \c m_comment data
976 member: it can be used by the DoRead() function if comments have to be
977 stored in the value they refer to.
978 */
979 int
SkipComment(wxInputStream & is)980 wxJSONReader::SkipComment( wxInputStream& is )
981 {
982 static const wxChar* warn =
983 _T("Comments may be tolerated in JSON text but they are not part of JSON syntax");
984
985 // if it is a comment, then a warning is added to the array
986 // otherwise it is an error: values cannot start with a '/'
987 // read the char next to the first slash
988 int ch = ReadChar( is );
989 if ( ch < 0 ) {
990 return -1;
991 }
992
993 wxLogTrace( storeTraceMask, _T("(%s) start comment line=%d col=%d"),
994 __PRETTY_FUNCTION__, m_lineNo, m_colNo );
995
996 // the temporary UTF-8/ANSI buffer that holds the comment string. This will be
997 // converted to a wxString object using wxString::FromUTF8() or From8BitData()
998 wxMemoryBuffer utf8Buff;
999 unsigned char c;
1000
1001 if ( ch == '/' ) { // C++ comment, read until end-of-line
1002 // C++ comment strings are in UTF-8 format. we store all
1003 // UTF-8 code units until the first LF or CR+LF
1004 AddWarning( wxJSONREADER_ALLOW_COMMENTS, warn );
1005 m_commentLine = m_lineNo;
1006 utf8Buff.AppendData( "//", 2 );
1007
1008 while ( ch >= 0 ) {
1009 if ( ch == '\n' ) {
1010 break;
1011 }
1012 if ( ch == '\r' ) {
1013 ch = PeekChar( is );
1014 if ( ch == '\n' ) {
1015 ch = ReadChar( is );
1016 }
1017 break;
1018 }
1019 else {
1020 // store the char in the UTF8 temporary buffer
1021 c = (unsigned char) ch;
1022 utf8Buff.AppendByte( c );
1023 }
1024 ch = ReadChar( is );
1025 }
1026 // now convert the temporary UTF-8 buffer
1027 m_comment = wxString::FromUTF8( (const char*) utf8Buff.GetData(),
1028 utf8Buff.GetDataLen());
1029 }
1030
1031 // check if a C-style comment
1032 else if ( ch == '*' ) { // C-style comment
1033 AddWarning(wxJSONREADER_ALLOW_COMMENTS, warn );
1034 m_commentLine = m_lineNo;
1035 utf8Buff.AppendData( "/*", 2 );
1036 while ( ch >= 0 ) {
1037 // check the END-COMMENT chars ('*/')
1038 if ( ch == '*' ) {
1039 ch = PeekChar( is );
1040 if ( ch == '/' ) {
1041 ch = ReadChar( is ); // read the '/' char
1042 ch = ReadChar( is ); // read the next char that will be returned
1043 utf8Buff.AppendData( "*/", 2 );
1044 break;
1045 }
1046 }
1047 // store the char in the UTF8 temporary buffer
1048 c = (unsigned char) ch;
1049 utf8Buff.AppendByte( c );
1050 ch = ReadChar( is );
1051 }
1052 // now convert the temporary buffer in a wxString object
1053 if ( m_noUtf8 ) {
1054 m_comment = wxString::From8BitData( (const char*) utf8Buff.GetData(),
1055 utf8Buff.GetDataLen());
1056 }
1057 else {
1058 m_comment = wxString::FromUTF8( (const char*) utf8Buff.GetData(),
1059 utf8Buff.GetDataLen());
1060 }
1061 }
1062
1063 else { // it is not a comment, return the character next the first '/'
1064 AddError( _T( "Strange '/' (did you want to insert a comment?)"));
1065 // we read until end-of-line OR end of C-style comment OR EOF
1066 // because a '/' should be a start comment
1067 while ( ch >= 0 ) {
1068 ch = ReadChar( is );
1069 if ( ch == '*' && PeekChar( is ) == '/' ) {
1070 break;
1071 }
1072 if ( ch == '\n' ) {
1073 break;
1074 }
1075 }
1076 // read the next char that will be returned
1077 ch = ReadChar( is );
1078 }
1079 wxLogTrace( traceMask, _T("(%s) end comment line=%d col=%d"),
1080 __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1081 wxLogTrace( storeTraceMask, _T("(%s) end comment line=%d col=%d"),
1082 __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1083 wxLogTrace( storeTraceMask, _T("(%s) comment=%s"),
1084 __PRETTY_FUNCTION__, m_comment.c_str());
1085 return ch;
1086 }
1087
1088 //! Read a string value
1089 /*!
1090 The function reads a string value from input stream and it is
1091 called by the \c DoRead() function when it enconters the
1092 double quote characters.
1093 The function read all bytes up to the next double quotes
1094 (unless it is escaped) and stores them in a temporary UTF-8
1095 memory buffer.
1096 Also, the function processes the escaped characters defined
1097 in the JSON syntax.
1098
1099 Next, the function tries to convert the UTF-8 buffer to a
1100 \b wxString object using the \b wxString::FromUTF8 function.
1101 Depending on the build mode, we can have the following:
1102 \li in Unicode the function always succeeds, provided that the
1103 buffer contains valid UTF-8 code units.
1104
1105 \li in ANSI builds the conversion may fail because of the presence of
1106 unrepresentable characters in the current locale. In this case,
1107 the default behaviour is to perform a char-by-char conversion; every
1108 char that cannot be represented in the current locale is stored as
1109 \e unicode \e escaped \e sequence
1110
1111 \li in ANSI builds, if the reader is constructed with the wxJSONREADER_NOUTF8_STREAM
1112 then no conversion takes place and the UTF-8 temporary buffer is simply
1113 \b copied to the \b wxString object
1114
1115 The string is, finally, stored in the provided wxJSONValue argument
1116 provided that it is empty or it contains a string value.
1117 This is because the parser class recognizes multi-line strings
1118 like the following one:
1119 \code
1120 [
1121 "This is a very long string value which is splitted into more"
1122 "than one line because it is more human readable"
1123 ]
1124 \endcode
1125 Because of the lack of the value separator (,) the parser
1126 assumes that the string was splitted into several double-quoted
1127 strings.
1128 If the value does not contain a string then an error is
1129 reported.
1130 Splitted strings cause the parser to report a warning.
1131 */
1132 int
ReadString(wxInputStream & is,wxJSONValue & val)1133 wxJSONReader::ReadString( wxInputStream& is, wxJSONValue& val )
1134 {
1135 // the char last read is the opening qoutes (")
1136
1137 wxMemoryBuffer utf8Buff;
1138 char ues[8]; // stores a Unicode Escaped Esquence: \uXXXX
1139
1140 int ch = 0;
1141 while ( ch >= 0 ) {
1142 ch = ReadChar( is );
1143 unsigned char c = (unsigned char) ch;
1144 if ( ch == '\\' ) { // an escape sequence
1145 ch = ReadChar( is );
1146 switch ( ch ) {
1147 case -1 : // EOF
1148 break;
1149 case 't' :
1150 utf8Buff.AppendByte( '\t' );
1151 break;
1152 case 'n' :
1153 utf8Buff.AppendByte( '\n' );
1154 break;
1155 case 'b' :
1156 utf8Buff.AppendByte( '\b' );
1157 break;
1158 case 'r' :
1159 utf8Buff.AppendByte( '\r' );
1160 break;
1161 case '\"' :
1162 utf8Buff.AppendByte( '\"' );
1163 break;
1164 case '\\' :
1165 utf8Buff.AppendByte( '\\' );
1166 break;
1167 case '/' :
1168 utf8Buff.AppendByte( '/' );
1169 break;
1170 case 'f' :
1171 utf8Buff.AppendByte( '\f' );
1172 break;
1173 case 'u' :
1174 ch = ReadUES( is, ues );
1175 if ( ch < 0 ) { // if EOF, returns
1176 return ch;
1177 }
1178 // append the escaped character to the UTF8 buffer
1179 AppendUES( utf8Buff, ues );
1180 // many thanks to Bryan Ashby who discovered this bug
1181 continue;
1182 // break;
1183 default :
1184 AddError( _T( "Unknow escaped character \'\\%c\'"), ch );
1185 }
1186 }
1187 else {
1188 // we have read a non-escaped character so we have to append it to
1189 // the temporary UTF-8 buffer until the next quote char
1190 if ( ch == '\"' ) {
1191 break;
1192 }
1193 utf8Buff.AppendByte( c );
1194 }
1195 }
1196
1197 // if UTF-8 conversion is disabled (ANSI builds only) we just copy the
1198 // bit data to a wxString object
1199 wxString s;
1200 if ( m_noUtf8 ) {
1201 s = wxString::From8BitData( (const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1202 }
1203 else {
1204 // perform UTF-8 conversion
1205 // first we check that the UTF-8 buffer is correct, i.e. it contains valid
1206 // UTF-8 code points.
1207 // this works in both ANSI and Unicode builds.
1208 size_t convLen = wxConvUTF8.ToWChar( 0, // wchar_t destination
1209 0, // size_t destLenght
1210 (const char*) utf8Buff.GetData(), // char_t source
1211 utf8Buff.GetDataLen()); // size_t sourceLenght
1212
1213 if ( convLen == wxCONV_FAILED ) {
1214 AddError( _T( "String value: the UTF-8 stream is invalid"));
1215 s.append( _T( "<UTF-8 stream not valid>"));
1216 }
1217 else {
1218 #if defined( wxJSON_USE_UNICODE )
1219 // in Unicode just convert to wxString
1220 s = wxString::FromUTF8( (const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1221 #else
1222 // in ANSI, the conversion may fail and an empty string is returned
1223 // in this case, the reader do a char-by-char conversion storing
1224 // unicode escaped sequences of unrepresentable characters
1225 s = wxString::FromUTF8( (const char*) utf8Buff.GetData(), utf8Buff.GetDataLen());
1226 if ( s.IsEmpty() ) {
1227 int r = ConvertCharByChar( s, utf8Buff ); // return number of escaped sequences
1228 if ( r > 0 ) {
1229 AddWarning( 0, _T( "The string value contains unrepresentable Unicode characters"));
1230 }
1231 }
1232 #endif
1233 }
1234 }
1235 wxLogTrace( traceMask, _T("(%s) line=%d col=%d"),
1236 __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1237 wxLogTrace( traceMask, _T("(%s) string read=%s"),
1238 __PRETTY_FUNCTION__, s.c_str() );
1239 wxLogTrace( traceMask, _T("(%s) value=%s"),
1240 __PRETTY_FUNCTION__, val.AsString().c_str() );
1241
1242 // now assign the string to the JSON-value 'value'
1243 // must check that:
1244 // 'value' is empty
1245 // 'value' is a string; concatenate it but emit warning
1246 if ( !val.IsValid() ) {
1247 wxLogTrace( traceMask, _T("(%s) assigning the string to value"), __PRETTY_FUNCTION__ );
1248 val = s ;
1249 }
1250 else if ( val.IsString() ) {
1251 AddWarning( wxJSONREADER_MULTISTRING,
1252 _T("Multiline strings are not allowed by JSON syntax") );
1253 wxLogTrace( traceMask, _T("(%s) concatenate the string to value"), __PRETTY_FUNCTION__ );
1254 val.Cat( s );
1255 }
1256 else {
1257 AddError( _T( "String value \'%s\' cannot follow another value"), s );
1258 }
1259
1260 // store the input text's line number when the string was stored in 'val'
1261 val.SetLineNo( m_lineNo );
1262
1263 // read the next char after the closing quotes and returns it
1264 if ( ch >= 0 ) {
1265 ch = ReadChar( is );
1266 }
1267 return ch;
1268 }
1269
1270 //! Reads a token string
1271 /*!
1272 This function is called by the ReadValue() when the
1273 first character encontered is not a special char
1274 and it is not a double-quote.
1275 The only possible type is a literal or a number which
1276 all lies in the US-ASCII charset so their UTF-8 encodeing
1277 is the same as US-ASCII.
1278 The function simply reads one byte at a time from the stream
1279 and appends them to a \b wxString object.
1280 Returns the next character read.
1281
1282 A token cannot include \e unicode \e escaped \e sequences
1283 so this function does not try to interpret such sequences.
1284
1285 @param is the input stream
1286 @param ch the character read by DoRead
1287 @param s the string object that contains the token read
1288 @return -1 in case of errors or EOF
1289 */
1290 int
ReadToken(wxInputStream & is,int ch,wxString & s)1291 wxJSONReader::ReadToken( wxInputStream& is, int ch, wxString& s )
1292 {
1293 int nextCh = ch;
1294 while ( nextCh >= 0 ) {
1295 switch ( nextCh ) {
1296 case ' ' :
1297 case ',' :
1298 case ':' :
1299 case '[' :
1300 case ']' :
1301 case '{' :
1302 case '}' :
1303 case '\t' :
1304 case '\n' :
1305 case '\r' :
1306 case '\b' :
1307 wxLogTrace( traceMask, _T("(%s) line=%d col=%d"),
1308 __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1309 wxLogTrace( traceMask, _T("(%s) token read=%s"),
1310 __PRETTY_FUNCTION__, s.c_str() );
1311 return nextCh;
1312 break;
1313 default :
1314 s.Append( (unsigned char) nextCh, 1 );
1315 break;
1316 }
1317 // read the next character
1318 nextCh = ReadChar( is );
1319 }
1320 wxLogTrace( traceMask, _T("(%s) EOF on line=%d col=%d"),
1321 __PRETTY_FUNCTION__, m_lineNo, m_colNo );
1322 wxLogTrace( traceMask, _T("(%s) EOF - token read=%s"),
1323 __PRETTY_FUNCTION__, s.c_str() );
1324 return nextCh;
1325 }
1326
1327 //! Read a value from input stream
1328 /*!
1329 The function is called by DoRead() when it enconters a char that is
1330 not a special char nor a double-quote.
1331 It assumes that the string is a numeric value or a literal
1332 boolean value and stores it in the wxJSONValue object \c val.
1333
1334 The function also checks that \c val is of type wxJSONTYPE_INVALID otherwise
1335 an error is reported becasue a value cannot follow another value:
1336 maybe a (,) or (:) is missing.
1337
1338 If the literal starts with a digit, a plus or minus sign, the function
1339 tries to interpret it as a number. The following are tried by the function,
1340 in this order:
1341
1342 \li if the literal starts with a digit: signed integer, then unsigned integer
1343 and finally double conversion is tried
1344 \li if the literal starts with a minus sign: signed integer, then double
1345 conversion is tried
1346 \li if the literal starts with plus sign: unsigned integer
1347 then double conversion is tried
1348
1349 Returns the next character or -1 on EOF.
1350 */
1351 int
ReadValue(wxInputStream & is,int ch,wxJSONValue & val)1352 wxJSONReader::ReadValue( wxInputStream& is, int ch, wxJSONValue& val )
1353 {
1354 wxString s;
1355 int nextCh = ReadToken( is, ch, s );
1356 wxLogTrace( traceMask, _T("(%s) value=%s"),
1357 __PRETTY_FUNCTION__, val.AsString().c_str() );
1358
1359 if ( val.IsValid() ) {
1360 AddError( _T( "Value \'%s\' cannot follow a value: \',\' or \':\' missing?"), s );
1361 return nextCh;
1362 }
1363
1364 // variables used for converting numeric values
1365 bool r; double d;
1366 #if defined( wxJSON_64BIT_INT )
1367 wxInt64 i64;
1368 wxUint64 ui64;
1369 #else
1370 unsigned long int ul; long int l;
1371 #endif
1372
1373 // first try the literal strings lowercase and nocase
1374 if ( s == _T("null") ) {
1375 val.SetType( wxJSONTYPE_NULL );
1376 wxLogTrace( traceMask, _T("(%s) value = NULL"), __PRETTY_FUNCTION__ );
1377 return nextCh;
1378 }
1379 else if ( s.CmpNoCase( _T( "null" )) == 0 ) {
1380 wxLogTrace( traceMask, _T("(%s) value = NULL"), __PRETTY_FUNCTION__ );
1381 AddWarning( wxJSONREADER_CASE, _T( "the \'null\' literal must be lowercase" ));
1382 val.SetType( wxJSONTYPE_NULL );
1383 return nextCh;
1384 }
1385 else if ( s == _T("true") ) {
1386 wxLogTrace( traceMask, _T("(%s) value = TRUE"), __PRETTY_FUNCTION__ );
1387 val = true;
1388 return nextCh;
1389 }
1390 else if ( s.CmpNoCase( _T( "true" )) == 0 ) {
1391 wxLogTrace( traceMask, _T("(%s) value = TRUE"), __PRETTY_FUNCTION__ );
1392 AddWarning( wxJSONREADER_CASE, _T( "the \'true\' literal must be lowercase" ));
1393 val = true;
1394 return nextCh;
1395 }
1396 else if ( s == _T("false") ) {
1397 wxLogTrace( traceMask, _T("(%s) value = FALSE"), __PRETTY_FUNCTION__ );
1398 val = false;
1399 return nextCh;
1400 }
1401 else if ( s.CmpNoCase( _T( "false" )) == 0 ) {
1402 wxLogTrace( traceMask, _T("(%s) value = FALSE"), __PRETTY_FUNCTION__ );
1403 AddWarning( wxJSONREADER_CASE, _T( "the \'false\' literal must be lowercase" ));
1404 val = false;
1405 return nextCh;
1406 }
1407
1408
1409 // try to convert to a number if the token starts with a digit, a plus or a minus
1410 // sign. The function first states what type of conversion are tested:
1411 // 1. first signed integer (not if 'ch' == '+')
1412 // 2. unsigned integer (not if 'ch' == '-')
1413 // 3. finally double
1414 bool tSigned = true, tUnsigned = true, tDouble = true;
1415 switch ( ch ) {
1416 case '0' :
1417 case '1' :
1418 case '2' :
1419 case '3' :
1420 case '4' :
1421 case '5' :
1422 case '6' :
1423 case '7' :
1424 case '8' :
1425 case '9' :
1426 // first try a signed integer, then a unsigned integer, then a double
1427 break;
1428
1429 case '+' :
1430 // the plus sign forces a unsigned integer
1431 tSigned = false;
1432 break;
1433
1434 case '-' :
1435 // try signed and double
1436 tUnsigned = false;
1437 break;
1438 default :
1439 AddError( _T( "Literal \'%s\' is incorrect (did you forget quotes?)"), s );
1440 return nextCh;
1441 }
1442
1443 if ( tSigned ) {
1444 #if defined( wxJSON_64BIT_INT)
1445 r = Strtoll( s, &i64 );
1446 wxLogTrace( traceMask, _T("(%s) convert to wxInt64 result=%d"),
1447 __PRETTY_FUNCTION__, r );
1448 if ( r ) {
1449 // store the value
1450 val = i64;
1451 return nextCh;
1452 }
1453 #else
1454 r = s.ToLong( &l );
1455 wxLogTrace( traceMask, _T("(%s) convert to int result=%d"),
1456 __PRETTY_FUNCTION__, r );
1457 if ( r ) {
1458 // store the value
1459 val = (int) l;
1460 return nextCh;
1461 }
1462 #endif
1463 }
1464
1465 if ( tUnsigned ) {
1466 #if defined( wxJSON_64BIT_INT)
1467 r = Strtoull( s, &ui64 );
1468 wxLogTrace( traceMask, _T("(%s) convert to wxUint64 result=%d"),
1469 __PRETTY_FUNCTION__, r );
1470 if ( r ) {
1471 // store the value
1472 val = ui64;
1473 return nextCh;
1474 }
1475 #else
1476 r = s.ToULong( &ul );
1477 wxLogTrace( traceMask, _T("(%s) convert to int result=%d"),
1478 __PRETTY_FUNCTION__, r );
1479 if ( r ) {
1480 // store the value
1481 val = (unsigned int) ul;
1482 return nextCh;
1483 }
1484 #endif
1485 }
1486
1487 if ( tDouble ) {
1488 r = s.ToDouble( &d );
1489 wxLogTrace( traceMask, _T("(%s) convert to double result=%d"),
1490 __PRETTY_FUNCTION__, r );
1491 if ( r ) {
1492 // store the value
1493 val = d;
1494 return nextCh;
1495 }
1496 }
1497
1498
1499 // the value is not syntactically correct
1500 AddError( _T( "Literal \'%s\' is incorrect (did you forget quotes?)"), s );
1501 return nextCh;
1502 return nextCh;
1503 }
1504
1505
1506 //! Read a 4-hex-digit unicode character.
1507 /*!
1508 The function is called by ReadString() when the \b \\u sequence is
1509 encontered; the sequence introduces a control character in the form:
1510 \code
1511 \uXXXX
1512 \endcode
1513 where XXXX is a four-digit hex code..
1514 The function reads four chars from the input UTF8 stream by calling ReadChar()
1515 four times: if EOF is encontered before reading four chars, -1 is
1516 also returned and no sequence interpretation is performed.
1517 The function stores the 4 hexadecimal digits in the \c uesBuffer parameter.
1518
1519 Returns the character after the hex sequence or -1 if EOF.
1520
1521 \b NOTICE: although the JSON syntax states that only control characters
1522 are represented in this way, the wxJSON library reads and recognizes all
1523 unicode characters in the BMP.
1524 */
1525 int
ReadUES(wxInputStream & is,char * uesBuffer)1526 wxJSONReader::ReadUES( wxInputStream& is, char* uesBuffer )
1527 {
1528 int ch;
1529 for ( int i = 0; i < 4; i++ ) {
1530 ch = ReadChar( is );
1531 if ( ch < 0 ) {
1532 return ch;
1533 }
1534 uesBuffer[i] = (unsigned char) ch;
1535 }
1536 uesBuffer[4] = 0; // makes a ASCIIZ string
1537
1538 return 0;
1539 }
1540
1541
1542 //! The function appends a Unice Escaped Sequence to the temporary UTF8 buffer
1543 /*!
1544 This function is called by \c ReadString() when a \e unicode \e escaped
1545 \e sequence is read from the input text as for example:
1546
1547 \code
1548 \u0001
1549 \endcode
1550
1551 which represents a control character.
1552 The \c uesBuffer parameter contains the 4 hexadecimal digits that are
1553 read from \c ReadUES.
1554
1555 The function tries to convert the 4 hex digits in a \b wchar_t character
1556 which is appended to the memory buffer \c utf8Buff after converting it
1557 to UTF-8.
1558
1559 If the conversion from hexadecimal fails, the function does not
1560 store the character in the UTF-8 buffer and an error is reported.
1561 The function is the same in ANSI and Unicode.
1562 Returns -1 if the buffer does not contain valid hex digits.
1563 sequence. On success returns ZERO.
1564
1565 @param utf8Buff the UTF-8 buffer to which the control char is written
1566 @param uesBuffer the four-hex-digits read from the input text
1567 @return ZERO on success, -1 if the four-hex-digit buffer cannot be converted
1568 */
1569 int
AppendUES(wxMemoryBuffer & utf8Buff,const char * uesBuffer)1570 wxJSONReader::AppendUES( wxMemoryBuffer& utf8Buff, const char* uesBuffer )
1571 {
1572 unsigned long l;
1573 int r = sscanf( uesBuffer, "%lx", &l ); // r is the assigned items
1574 if ( r != 1 ) {
1575 AddError( _T( "Invalid Unicode Escaped Sequence"));
1576 return -1;
1577 }
1578 wxLogTrace( traceMask, _T("(%s) unicode sequence=%s code=%ld"),
1579 __PRETTY_FUNCTION__, uesBuffer, l );
1580
1581 wchar_t ch = (wchar_t) l;
1582 char buffer[16];
1583 size_t len = wxConvUTF8.FromWChar( buffer, 10, &ch, 1 );
1584
1585 // seems that the wxMBConv classes always appends a NULL byte to
1586 // the converted buffer
1587 if ( len > 1 ) {
1588 len = len - 1;
1589 }
1590 utf8Buff.AppendData( buffer, len );
1591
1592 // sould never fail
1593 wxASSERT( len != wxCONV_FAILED );
1594 return 0;
1595 }
1596
1597 //! Store the comment string in the value it refers to.
1598 /*!
1599 The function searches a suitable value object for storing the
1600 comment line that was read by the parser and temporarly
1601 stored in \c m_comment.
1602 The function searches the three values pointed to by:
1603 \li \c m_next
1604 \li \c m_current
1605 \li \c m_lastStored
1606
1607 The value that the comment refers to is:
1608
1609 \li if the comment is on the same line as one of the values, the comment
1610 refer to that value and it is stored as \b inline.
1611 \li otherwise, if the comment flag is wxJSONREADER_COMMENTS_BEFORE, the comment lines
1612 are stored in the value pointed to by \c m_next
1613 \li otherwise, if the comment flag is wxJSONREADER_COMMENTS_AFTER, the comment lines
1614 are stored in the value pointed to by \c m_current or m_latStored
1615
1616 Note that the comment line is only stored if the wxJSONREADER_STORE_COMMENTS
1617 flag was used when the parser object was constructed; otherwise, the
1618 function does nothing and immediatly returns.
1619 Also note that if the comment line has to be stored but the
1620 function cannot find a suitable value to add the comment line to,
1621 an error is reported (note: not a warning but an error).
1622 */
1623 void
StoreComment(const wxJSONValue * parent)1624 wxJSONReader::StoreComment( const wxJSONValue* parent )
1625 {
1626 wxLogTrace( storeTraceMask, _T("(%s) m_comment=%s"), __PRETTY_FUNCTION__, m_comment.c_str());
1627 wxLogTrace( storeTraceMask, _T("(%s) m_flags=%d m_commentLine=%d"),
1628 __PRETTY_FUNCTION__, m_flags, m_commentLine );
1629 wxLogTrace( storeTraceMask, _T("(%s) m_current=%p"), __PRETTY_FUNCTION__, m_current );
1630 wxLogTrace( storeTraceMask, _T("(%s) m_next=%p"), __PRETTY_FUNCTION__, m_next );
1631 wxLogTrace( storeTraceMask, _T("(%s) m_lastStored=%p"), __PRETTY_FUNCTION__, m_lastStored );
1632
1633 // first check if the 'store comment' bit is on
1634 if ( (m_flags & wxJSONREADER_STORE_COMMENTS) == 0 ) {
1635 m_comment.clear();
1636 return;
1637 }
1638
1639 // check if the comment is on the same line of one of the
1640 // 'current', 'next' or 'lastStored' value
1641 if ( m_current != 0 ) {
1642 wxLogTrace( storeTraceMask, _T("(%s) m_current->lineNo=%d"),
1643 __PRETTY_FUNCTION__, m_current->GetLineNo() );
1644 if ( m_current->GetLineNo() == m_commentLine ) {
1645 wxLogTrace( storeTraceMask, _T("(%s) comment added to \'m_current\' INLINE"),
1646 __PRETTY_FUNCTION__ );
1647 m_current->AddComment( m_comment, wxJSONVALUE_COMMENT_INLINE );
1648 m_comment.clear();
1649 return;
1650 }
1651 }
1652 if ( m_next != 0 ) {
1653 wxLogTrace( storeTraceMask, _T("(%s) m_next->lineNo=%d"),
1654 __PRETTY_FUNCTION__, m_next->GetLineNo() );
1655 if ( m_next->GetLineNo() == m_commentLine ) {
1656 wxLogTrace( storeTraceMask, _T("(%s) comment added to \'m_next\' INLINE"),
1657 __PRETTY_FUNCTION__ );
1658 m_next->AddComment( m_comment, wxJSONVALUE_COMMENT_INLINE );
1659 m_comment.clear();
1660 return;
1661 }
1662 }
1663 if ( m_lastStored != 0 ) {
1664 wxLogTrace( storeTraceMask, _T("(%s) m_lastStored->lineNo=%d"),
1665 __PRETTY_FUNCTION__, m_lastStored->GetLineNo() );
1666 if ( m_lastStored->GetLineNo() == m_commentLine ) {
1667 wxLogTrace( storeTraceMask, _T("(%s) comment added to \'m_lastStored\' INLINE"),
1668 __PRETTY_FUNCTION__ );
1669 m_lastStored->AddComment( m_comment, wxJSONVALUE_COMMENT_INLINE );
1670 m_comment.clear();
1671 return;
1672 }
1673 }
1674
1675 // if comment is BEFORE, store the comment in the 'm_next'
1676 // or 'm_current' value
1677 // if comment is AFTER, store the comment in the 'm_lastStored'
1678 // or 'm_current' value
1679
1680 if ( m_flags & wxJSONREADER_COMMENTS_AFTER ) { // comment AFTER
1681 if ( m_current ) {
1682 if ( m_current == parent || !m_current->IsValid()) {
1683 AddError( _T("Cannot find a value for storing the comment (flag AFTER)"));
1684 }
1685 else {
1686 wxLogTrace( storeTraceMask, _T("(%s) comment added to m_current (AFTER)"),
1687 __PRETTY_FUNCTION__ );
1688 m_current->AddComment( m_comment, wxJSONVALUE_COMMENT_AFTER );
1689 }
1690 }
1691 else if ( m_lastStored ) {
1692 wxLogTrace( storeTraceMask, _T("(%s) comment added to m_lastStored (AFTER)"),
1693 __PRETTY_FUNCTION__ );
1694 m_lastStored->AddComment( m_comment, wxJSONVALUE_COMMENT_AFTER );
1695 }
1696 else {
1697 wxLogTrace( storeTraceMask,
1698 _T("(%s) cannot find a value for storing the AFTER comment"), __PRETTY_FUNCTION__ );
1699 AddError(_T("Cannot find a value for storing the comment (flag AFTER)"));
1700 }
1701 }
1702 else { // comment BEFORE can only be added to the 'next' value
1703 if ( m_next ) {
1704 wxLogTrace( storeTraceMask, _T("(%s) comment added to m_next (BEFORE)"),
1705 __PRETTY_FUNCTION__ );
1706 m_next->AddComment( m_comment, wxJSONVALUE_COMMENT_BEFORE );
1707 }
1708 else {
1709 // cannot find a value for storing the comment
1710 AddError(_T("Cannot find a value for storing the comment (flag BEFORE)"));
1711 }
1712 }
1713 m_comment.clear();
1714 }
1715
1716
1717 //! Return the number of bytes that make a character in stream input
1718 /*!
1719 This function returns the number of bytes that represent a unicode
1720 code point in various encoding.
1721 For example, if the input stream is UTF-32 the function returns 4.
1722 Because the only recognized format for streams is UTF-8 the function
1723 just calls UTF8NumBytes() and returns.
1724 The function is, actually, not used at all.
1725
1726 */
1727 int
NumBytes(char ch)1728 wxJSONReader::NumBytes( char ch )
1729 {
1730 int n = UTF8NumBytes( ch );
1731 return n;
1732 }
1733
1734 //! Compute the number of bytes that makes a UTF-8 encoded wide character.
1735 /*!
1736 The function counts the number of '1' bit in the character \c ch and
1737 returns it.
1738 The UTF-8 encoding specifies the number of bytes needed by a wide character
1739 by coding it in the first byte. See below.
1740
1741 Note that if the character does not contain a valid UTF-8 encoding
1742 the function returns -1.
1743
1744 \code
1745 UCS-4 range (hex.) UTF-8 octet sequence (binary)
1746 ------------------- -----------------------------
1747 0000 0000-0000 007F 0xxxxxxx
1748 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1749 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1750 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1751 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1752 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
1753 \endcode
1754 */
1755 int
UTF8NumBytes(char ch)1756 wxJSONReader::UTF8NumBytes( char ch )
1757 {
1758 int num = 0; // the counter of '1' bits
1759 for ( int i = 0; i < 8; i++ ) {
1760 if ( (ch & 0x80) == 0 ) {
1761 break;
1762 }
1763 ++num;
1764 ch = ch << 1;
1765 }
1766
1767 // note that if the char contains more than six '1' bits it is not
1768 // a valid UTF-8 encoded character
1769 if ( num > 6 ) {
1770 num = -1;
1771 }
1772 else if ( num == 0 ) {
1773 num = 1;
1774 }
1775 return num;
1776 }
1777
1778 //! Convert a UTF-8 memory buffer one char at a time
1779 /*!
1780 This function is used in ANSI mode when input from a stream is in UTF-8
1781 format and the UTF-8 buffer read cannot be converted to the locale
1782 wxString object.
1783 The function performs a char-by-char conversion of the buffer and appends
1784 every representable character to the string \c s.
1785 Characters that cannot be represented are stored as \e unicode \e escaped
1786 \e sequences in the form:
1787 \code
1788 \uXXXX
1789 \endcode
1790 where XXXX is a for-hex-digits Unicode code point.
1791 The function returns the number of characters that cannot be represented
1792 in the current locale.
1793 */
1794 int
ConvertCharByChar(wxString & s,const wxMemoryBuffer & utf8Buffer)1795 wxJSONReader::ConvertCharByChar( wxString& s, const wxMemoryBuffer& utf8Buffer )
1796 {
1797 size_t len = utf8Buffer.GetDataLen();
1798 char* buff = (char*) utf8Buffer.GetData();
1799 char* buffEnd = buff + len;
1800
1801 int result = 0;
1802 char temp[16]; // the UTF-8 code-point
1803
1804 while ( buff < buffEnd ) {
1805 temp[0] = *buff; // the first UTF-8 code-unit
1806 // compute the number of code-untis that make one UTF-8 code-point
1807 int numBytes = NumBytes( *buff );
1808 ++buff;
1809 for ( int i = 1; i < numBytes; i++ ) {
1810 if ( buff >= buffEnd ) {
1811 break;
1812 }
1813 temp[i] = *buff; // the first UTF-8 code-unit
1814 ++buff;
1815 }
1816 //if ( buff >= buffEnd ) {
1817 // break;
1818 //}
1819 // now convert 'temp' to a wide-character
1820 wchar_t dst[10];
1821 size_t outLength = wxConvUTF8.ToWChar( dst, 10, temp, numBytes );
1822
1823 // now convert the wide char to a locale dependent character
1824 // len = wxConvLocal.FromWChar( temp, 16, dst, outLength );
1825 // len = wxConviso8859_1.FromWChar( temp, 16, dst, outLength );
1826 len = wxConvLibc.FromWChar( temp, 16, dst, outLength );
1827 if ( len == wxCONV_FAILED ) {
1828 ++result;
1829 wxString t;
1830 t.Printf( _T( "\\u%04X"), (int) dst[0] );
1831 s.Append( t );
1832 }
1833 else {
1834 s.Append( temp[0], 1 );
1835 }
1836 } // end while
1837 return result;
1838 }
1839
1840 //! Read a memory buffer type
1841 /*!
1842 This function is called by DoRead() when the single-quote character is
1843 encontered which starts a \e memory \e buffer type.
1844 This type is a \b wxJSON extension so the function emits a warning
1845 when such a type encontered.
1846 If the reader is constructed without the \c wxJSONREADER_MEMORYBUFF flag
1847 then the warning becomes an error.
1848 To know more about this JSON syntax extension read \ref wxjson_tutorial_memorybuff
1849
1850 @param is the input stream
1851 @param val the JSON value that will hold the memory buffer value
1852 @return the last char read or -1 in case of EOF
1853 */
1854
1855 union byte_union
1856 {
1857 unsigned char cu[2];
1858 short int bu;
1859 };
1860
1861 int
ReadMemoryBuff(wxInputStream & is,wxJSONValue & val)1862 wxJSONReader::ReadMemoryBuff( wxInputStream& is, wxJSONValue& val )
1863 {
1864 static const wxChar* membuffError = _T("the \'memory buffer\' type contains %d invalid digits" );
1865
1866 AddWarning( wxJSONREADER_MEMORYBUFF, _T( "the \'memory buffer\' type is not valid JSON text" ));
1867
1868 wxMemoryBuffer buff;
1869 int ch = 0; int errors = 0;
1870 unsigned char byte = 0;
1871 while ( ch >= 0 ) {
1872 ch = ReadChar( is );
1873 if ( ch < 0 ) {
1874 break;
1875 }
1876 if ( ch == '\'' ) {
1877 break;
1878 }
1879 // the conversion is done two chars at a time
1880 unsigned char c1 = (unsigned char) ch;
1881 ch = ReadChar( is );
1882 if ( ch < 0 ) {
1883 break;
1884 }
1885 unsigned char c2 = (unsigned char) ch;
1886 c1 -= '0';
1887 c2 -= '0';
1888 if ( c1 > 9 ) {
1889 c1 -= 7;
1890 }
1891 if ( c2 > 9 ) {
1892 c2 -= 7;
1893 }
1894 if ( c1 > 15 ) {
1895 ++errors;
1896 }
1897 else if ( c2 > 15 ) {
1898 ++errors;
1899 }
1900 else {
1901 byte = (c1 * 16) + c2;
1902 buff.AppendByte( byte );
1903 }
1904 } // end while
1905
1906 if ( errors > 0 ) {
1907 wxString err;
1908 err.Printf( membuffError, errors );
1909 AddError( err );
1910 }
1911
1912
1913 // now assign the memory buffer object to the JSON-value 'value'
1914 // must check that:
1915 // 'value' is invalid OR
1916 // 'value' is a memory buffer; concatenate it
1917 if ( !val.IsValid() ) {
1918 wxLogTrace( traceMask, _T("(%s) assigning the memory buffer to value"), __PRETTY_FUNCTION__ );
1919 val = buff ;
1920 }
1921 else if ( val.IsMemoryBuff() ) {
1922 wxLogTrace( traceMask, _T("(%s) concatenate memory buffer to value"), __PRETTY_FUNCTION__ );
1923 val.Cat( buff );
1924 }
1925 else {
1926 AddError( _T( "Memory buffer value cannot follow another value") );
1927 }
1928
1929 // store the input text's line number when the string was stored in 'val'
1930 val.SetLineNo( m_lineNo );
1931
1932 // read the next char after the closing quotes and returns it
1933 if ( ch >= 0 ) {
1934 ch = ReadChar( is );
1935 }
1936 return ch;
1937 }
1938
1939
1940
1941
1942 #if defined( wxJSON_64BIT_INT )
1943 //! Converts a decimal string to a 64-bit signed integer
1944 /*!
1945 This function implements a simple variant
1946 of the \b strtoll C-library function.
1947 I needed this implementation because the wxString::To(U)LongLong
1948 function does not work on my system:
1949
1950 \li GNU/Linux Fedora Core 6
1951 \li GCC version 4.1.1
1952 \li libc.so.6
1953
1954 The wxWidgets library (actually I have installed version 2.8.7)
1955 relies on \b strtoll in order to do the conversion from a string
1956 to a long long integer but, in fact, it does not work because
1957 the 'wxHAS_STRTOLL' macro is not defined on my system.
1958 The problem only affects the Unicode builds while it seems
1959 that the wxString::To(U)LongLong function works in ANSI builds.
1960
1961 Note that this implementation is not a complete substitute of the
1962 strtoll function because it only converts decimal strings (only base
1963 10 is implemented).
1964
1965 @param str the string that contains the decimal literal
1966 @param i64 the pointer to long long which holds the converted value
1967
1968 @return TRUE if the conversion succeeds
1969 */
1970 bool
Strtoll(const wxString & str,wxInt64 * i64)1971 wxJSONReader::Strtoll( const wxString& str, wxInt64* i64 )
1972 {
1973 wxChar sign = ' ';
1974 wxUint64 ui64;
1975 bool r = DoStrto_ll( str, &ui64, &sign );
1976
1977 if ( r) {
1978 // check overflow for signed long long
1979 switch ( sign ) {
1980 case '-' :
1981 if ( ui64 > (wxUint64) LLONG_MAX + 1 ) {
1982 r = false;
1983 }
1984 else {
1985 *i64 = (wxInt64) (ui64 * -1);
1986 }
1987 break;
1988
1989 // case '+' :
1990 default :
1991 if ( ui64 > LLONG_MAX ) {
1992 r = false;
1993 }
1994 else {
1995 *i64 = (wxInt64) ui64;
1996 }
1997 break;
1998 }
1999 }
2000 return r;
2001 }
2002
2003
2004 //! Converts a decimal string to a 64-bit unsigned integer.
2005 /*!
2006 Similar to \c Strtoll but for unsigned integers
2007 */
2008 bool
Strtoull(const wxString & str,wxUint64 * ui64)2009 wxJSONReader::Strtoull( const wxString& str, wxUint64* ui64 )
2010 {
2011 wxChar sign = ' ';
2012 bool r = DoStrto_ll( str, ui64, &sign );
2013 if ( sign == '-' ) {
2014 r = false;
2015 }
2016 return r;
2017 }
2018
2019 //! Perform the actual conversion from a string to a 64-bit integer
2020 /*!
2021 This function is called internally by the \c Strtoll and \c Strtoull functions
2022 and it does the actual conversion.
2023 The function is also able to check numeric overflow.
2024
2025 @param str the string that has to be converted
2026 @param ui64 the pointer to a unsigned long long that holds the converted value
2027 @param sign the pointer to a wxChar character that will get the sign of the literal string, if any
2028 @return TRUE if the conversion succeeds
2029 */
2030 bool
DoStrto_ll(const wxString & str,wxUint64 * ui64,wxChar * sign)2031 wxJSONReader::DoStrto_ll( const wxString& str, wxUint64* ui64, wxChar* sign )
2032 {
2033 // the conversion is done by multiplying the individual digits
2034 // in reverse order to the corresponding power of 10
2035 //
2036 // 10's power: 987654321.9876543210
2037 //
2038 // LLONG_MAX: 9223372036854775807
2039 // LLONG_MIN: -9223372036854775808
2040 // ULLONG_MAX: 18446744073709551615
2041 //
2042 // the function does not take into account the sign: only a
2043 // unsigned long long int is returned
2044
2045 int maxDigits = 20; // 20 + 1 (for the sign)
2046
2047 wxUint64 power10[] = {
2048 wxULL(1),
2049 wxULL(10),
2050 wxULL(100),
2051 wxULL(1000),
2052 wxULL(10000),
2053 wxULL(100000),
2054 wxULL(1000000),
2055 wxULL(10000000),
2056 wxULL(100000000),
2057 wxULL(1000000000),
2058 wxULL(10000000000),
2059 wxULL(100000000000),
2060 wxULL(1000000000000),
2061 wxULL(10000000000000),
2062 wxULL(100000000000000),
2063 wxULL(1000000000000000),
2064 wxULL(10000000000000000),
2065 wxULL(100000000000000000),
2066 wxULL(1000000000000000000),
2067 wxULL(10000000000000000000)
2068 };
2069
2070
2071 wxUint64 temp1 = wxULL(0); // the temporary converted integer
2072
2073 int strLen = str.length();
2074 if ( strLen == 0 ) {
2075 // an empty string is converted to a ZERO value: the function succeeds
2076 *ui64 = wxLL(0);
2077 return true;
2078 }
2079
2080 int index = 0;
2081 wxChar ch = str[0];
2082 if ( ch == '+' || ch == '-' ) {
2083 *sign = ch;
2084 ++index;
2085 ++maxDigits;
2086 }
2087
2088 if ( strLen > maxDigits ) {
2089 return false;
2090 }
2091
2092 // check the overflow: check the string length and the individual digits
2093 // of the string; the overflow is checked for unsigned long long
2094 if ( strLen == maxDigits ) {
2095 wxString uLongMax( _T("18446744073709551615"));
2096 int j = 0;
2097 for ( int i = index; i < strLen - 1; i++ ) {
2098 ch = str[i];
2099 if ( ch < '0' || ch > '9' ) {
2100 return false;
2101 }
2102 if ( ch > uLongMax[j] ) {
2103 return false;
2104 }
2105 if ( ch < uLongMax[j] ) {
2106 break;
2107 }
2108 ++j;
2109 }
2110 }
2111
2112 // get the digits in the reverse order and multiply them by the
2113 // corresponding power of 10
2114 int exponent = 0;
2115 for ( int i = strLen - 1; i >= index; i-- ) {
2116 wxChar ch = str[i];
2117 if ( ch < '0' || ch > '9' ) {
2118 return false;
2119 }
2120 ch = ch - '0';
2121 // compute the new temporary value
2122 temp1 += ch * power10[exponent];
2123 ++exponent;
2124 }
2125 *ui64 = temp1;
2126 return true;
2127 }
2128
2129 #endif // defined( wxJSON_64BIT_INT )
2130
2131 /*
2132 {
2133 }
2134 */
2135
2136
2137
2138