1 /**
2 * YAML parser and emitter PHP extension
3 *
4 * Copyright (c) 2007 Ryusuke SEKIYAMA. All rights reserved.
5 * Copyright (c) 2009 Keynetics Inc. All rights reserved.
6 * Copyright (c) 2015 Bryan Davis and contributors. All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 *
26 * @package php_yaml
27 * @author Ryusuke SEKIYAMA <rsky0711@gmail.com>
28 * @author Bryan Davis <bd808@bd808.com>
29 * @copyright 2007 Ryusuke SEKIYAMA
30 * @copyright 2009 Keynetics Inc
31 * @copyright 2015 Bryan Davis and contributors
32 * @license http://www.opensource.org/licenses/mit-license.php MIT License
33 */
34
35
36 #include "php_yaml.h"
37 #include "php_yaml_int.h"
38
39
40 /* {{{ local macros
41 */
42 #define ts_skip_space() \
43 while (ptr < end && (*ptr == ' ' || *ptr == '\t')) { \
44 ptr++; \
45 }
46
47 #define ts_skip_number() \
48 while (ptr < end && *ptr >= '0' && *ptr <= '9') { \
49 ptr++; \
50 }
51
52 /* }}} */
53
54
55 /* {{{ local prototypes
56 */
57 static zend_long eval_sexagesimal_l(zend_long lval, const char *sg, const char *eos);
58
59 static double eval_sexagesimal_d(double dval, const char *sg, const char *eos);
60
61 /* }}} */
62
63
64 /* {{{ detect_scalar_type(const char *, size_t, yaml_event_t)
65 * Guess what datatype the scalar encodes
66 */
detect_scalar_type(const char * value,size_t length,const yaml_event_t * event)67 const char *detect_scalar_type(const char *value, size_t length,
68 const yaml_event_t *event)
69 {
70 int flags = 0;
71 zend_long lval = 0;
72 double dval = 0.0;
73
74 /* is value a null? */
75 if (0 == length || scalar_is_null(value, length, event)) {
76 return YAML_NULL_TAG;
77 }
78
79 /* is value numeric? */
80 flags = scalar_is_numeric(value, length, &lval, &dval, NULL);
81 if (flags != Y_SCALAR_IS_NOT_NUMERIC) {
82 return (flags & Y_SCALAR_IS_FLOAT) ? YAML_FLOAT_TAG : YAML_INT_TAG;
83 }
84
85 /* is value boolean? */
86 flags = scalar_is_bool(value, length, event);
87 if (-1 != flags) {
88 return YAML_BOOL_TAG;
89 }
90
91 /* is value a timestamp? */
92 if (scalar_is_timestamp(value, length)) {
93 return YAML_TIMESTAMP_TAG;
94 }
95
96 /* no guess */
97 return NULL;
98 }
99 /* }}} */
100
101
102 /* {{{ scalar_is_null(const char *,size_t,yaml_event_t)
103 * Does this scalar encode a NULL value?
104 *
105 * specification is found at http://yaml.org/type/null.html.
106 */
107 int
scalar_is_null(const char * value,size_t length,const yaml_event_t * event)108 scalar_is_null(const char *value, size_t length, const yaml_event_t *event)
109 {
110 if (NULL != event && event->data.scalar.quoted_implicit) {
111 return 0;
112 }
113
114 if (NULL == event || event->data.scalar.plain_implicit) {
115 if (length == 0 ||
116 (length == 1 && *value == '~') ||
117 STR_EQ("NULL", value) ||
118 STR_EQ("Null", value) ||
119 STR_EQ("null", value)) {
120 return 1;
121 }
122
123 } else if (NULL != event && SCALAR_TAG_IS((*event), YAML_NULL_TAG)) {
124 return 1;
125 }
126
127 return 0;
128 }
129 /* }}} */
130
131
132 /* {{{ scalar_is_bool(const char *,size_t,yaml_event_t)
133 * Does this scalar encode a BOOL value?
134 *
135 * specification is found at http://yaml.org/type/bool.html.
136 */
137 int
scalar_is_bool(const char * value,size_t length,const yaml_event_t * event)138 scalar_is_bool(const char *value, size_t length, const yaml_event_t *event)
139 {
140 /* TODO: add ini setting to turn 'y'/'n' checks on/off */
141 if (NULL == event || IS_NOT_QUOTED_OR_TAG_IS((*event), YAML_BOOL_TAG)) {
142 if ((length == 1 && (*value == 'Y' || *value == 'y')) ||
143 STR_EQ("YES", value) ||
144 STR_EQ("Yes", value) ||
145 STR_EQ("yes", value) ||
146 STR_EQ("TRUE", value) ||
147 STR_EQ("True", value) ||
148 STR_EQ("true", value) ||
149 STR_EQ("ON", value) ||
150 STR_EQ("On", value) ||
151 STR_EQ("on", value)) {
152 return 1;
153 }
154
155 if ((length == 1 && (*value == 'N' || *value == 'n')) ||
156 STR_EQ("NO", value) ||
157 STR_EQ("No", value) ||
158 STR_EQ("no", value) ||
159 STR_EQ("FALSE", value) ||
160 STR_EQ("False", value) ||
161 STR_EQ("false", value) ||
162 STR_EQ("OFF", value) ||
163 STR_EQ("Off", value) ||
164 STR_EQ("off", value)) {
165 return 0;
166 }
167
168 } else if (NULL != event &&
169 IS_NOT_IMPLICIT_AND_TAG_IS((*event), YAML_BOOL_TAG)) {
170 if (0 == length || (1 == length && '0' == *value)) {
171 return 0;
172 } else {
173 return 1;
174 }
175 }
176
177 return -1;
178 }
179 /* }}} */
180
181
182 /* {{{ scalar_is_numeric()
183 * Does this scalar encode a NUMERIC value?
184 *
185 * specification is found at http://yaml.org/type/float.html.
186 * specification is found at http://yaml.org/type/int.html.
187 */
188 int
scalar_is_numeric(const char * value,size_t length,zend_long * lval,double * dval,char ** str)189 scalar_is_numeric(const char *value, size_t length, zend_long *lval,
190 double *dval, char **str)
191 {
192 const char *end = value + length;
193 char *buf = { 0 }, *ptr = { 0 };
194 int negative = 0;
195 int type = 0;
196
197 if (0 == length) {
198 goto not_numeric;
199 }
200
201 /* trim */
202 while (value < end && (*(end - 1) == ' ' || *(end - 1) == '\t')) {
203 end--;
204 }
205
206 while (value < end && (*value == ' ' || *value == '\t')) {
207 value++;
208 }
209
210 if (value == end) {
211 goto not_numeric;
212 }
213
214 /* not a number */
215 if (STR_EQ(".NAN", value) ||
216 STR_EQ(".NaN", value) ||
217 STR_EQ(".nan", value)) {
218 type = Y_SCALAR_IS_FLOAT | Y_SCALAR_IS_NAN;
219 goto finish;
220 }
221
222 /* catch the degenerate case of `.` as input */
223 if (STR_EQ(".", value)) {
224 goto not_numeric;
225 }
226
227 /* sign */
228 if (*value == '+') {
229 value++;
230
231 } else if (*value == '-') {
232 negative = 1;
233 value++;
234 }
235
236 if (value == end) {
237 goto not_numeric;
238 }
239
240 /* infinity */
241 if (STR_EQ(".INF", value) ||
242 STR_EQ(".Inf", value) ||
243 STR_EQ(".inf", value)) {
244 type = Y_SCALAR_IS_FLOAT;
245 type |= (negative ? Y_SCALAR_IS_INFINITY_N : Y_SCALAR_IS_INFINITY_P);
246 goto finish;
247 }
248
249 /* alloc */
250 buf = (char *) emalloc(length + 3);
251 ptr = buf;
252 if (negative) {
253 *ptr++ = '-';
254 }
255
256 /* parse */
257 if (*value == '0') {
258 *ptr++ = *value++;
259 if (value == end) {
260 goto return_zero;
261 }
262
263 if (*value == 'b') {
264 /* binary integer */
265 *ptr++ = *value++;
266 if (value == end) {
267 goto not_numeric;
268 }
269
270 while (value < end && (*value == '_' || *value == '0')) {
271 value++;
272 }
273
274 if (value == end) {
275 goto return_zero;
276 }
277
278 /* check the sequence */
279 while (value < end) {
280 if (*value == '_') {
281 value++;
282
283 } else if (*value == '0' || *value == '1') {
284 *ptr++ = *value++;
285
286 } else {
287 goto not_numeric;
288 }
289 }
290
291 type = Y_SCALAR_IS_INT | Y_SCALAR_IS_BINARY;
292
293 } else if (*value == 'x') {
294 /* hexadecimal integer */
295 *ptr++ = *value++;
296
297 if (value == end) {
298 goto not_numeric;
299 }
300
301 while (value < end && (*value == '_' || *value == '0')) {
302 value++;
303 }
304
305 if (value == end) {
306 goto return_zero;
307 }
308
309 /* check the sequence */
310 while (value < end) {
311 if (*value == '_') {
312 value++;
313
314 } else if ((*value >= '0' && *value <= '9') ||
315 (*value >= 'A' && *value <= 'F') ||
316 (*value >= 'a' && *value <= 'f')) {
317 *ptr++ = *value++;
318
319 } else {
320 goto not_numeric;
321 }
322 }
323
324 type = Y_SCALAR_IS_INT | Y_SCALAR_IS_HEXADECIMAL;
325
326 } else if (*value == '_' || (*value >= '0' && *value <= '7')) {
327 /* octal integer */
328 while (value < end) {
329 if (*value == '_') {
330 value++;
331
332 } else if (*value >= '0' && *value <= '7') {
333 *ptr++ = *value++;
334
335 } else {
336 goto not_numeric;
337 }
338 }
339
340 type = Y_SCALAR_IS_INT | Y_SCALAR_IS_OCTAL;
341
342 } else if (*value == '.') {
343 goto check_float;
344
345 } else if (*value == ':') {
346 goto check_sexa;
347
348 } else {
349 goto not_numeric;
350 }
351
352 } else if (*value >= '1' && *value <= '9') {
353 /* integer */
354 *ptr++ = *value++;
355 while (value < end) {
356 if (*value == '_' || *value == ',') {
357 value++;
358
359 } else if (*value >= '0' && *value <= '9') {
360 *ptr++ = *value++;
361
362 } else if (*value == ':') {
363 goto check_sexa;
364
365 } else if (*value == '.') {
366 goto check_float;
367
368 } else {
369 goto not_numeric;
370
371 }
372 }
373
374 type = Y_SCALAR_IS_INT | Y_SCALAR_IS_DECIMAL;
375
376 } else if (*value == ':') {
377 /* sexagecimal */
378
379 check_sexa:
380 while (value < end) {
381 if (*value == '.') {
382 type = Y_SCALAR_IS_FLOAT | Y_SCALAR_IS_SEXAGECIMAL;
383 goto check_float;
384 }
385
386 if (*value != ':') {
387 goto not_numeric;
388 }
389
390 *ptr++ = *value++;
391 if (*(value + 1) == ':' || *(value + 1) == '.' ||
392 (value + 1) == end) {
393 if (*value >= '0' && *value <= '9') {
394 *ptr++ = *value++;
395
396 } else {
397 goto not_numeric;
398 }
399
400 } else {
401 if ((*value >= '0' && *value <= '5') &&
402 (*(value + 1) >= '0' && *(value + 1) <= '9')) {
403 *ptr++ = *value++;
404 *ptr++ = *value++;
405
406 } else {
407 goto not_numeric;
408 }
409 }
410 }
411
412 if (*value == '.') {
413 type = Y_SCALAR_IS_FLOAT | Y_SCALAR_IS_SEXAGECIMAL;
414 goto check_float;
415
416 } else if (value == end) {
417 type = Y_SCALAR_IS_INT | Y_SCALAR_IS_SEXAGECIMAL;
418
419 } else {
420 goto not_numeric;
421 }
422
423 } else if (*value == '.') {
424 /* float */
425 *ptr++ = '0';
426
427 check_float:
428 *ptr++ = *value++;
429
430 if (type == (Y_SCALAR_IS_FLOAT | Y_SCALAR_IS_SEXAGECIMAL)) {
431 /* sexagecimal float */
432 while (value < end && (*(end - 1) == '_' || *(end - 1) == '0')) {
433 end--;
434 }
435
436 if (value == end) {
437 *ptr++ = '0';
438 }
439
440 while (value < end) {
441 if (*value == '_') {
442 value++;
443
444 } else if (*value >= '0' && *value <= '9') {
445 *ptr++ = *value++;
446
447 } else {
448 goto not_numeric;
449 }
450 }
451
452 } else {
453 /* decimal float */
454 int is_exp = 0;
455 while (value < end) {
456 if (*value == '_') {
457 value++;
458
459 } else if (*value >= '0' && *value <= '9') {
460 *ptr++ = *value++;
461
462 } else if (*value == 'E' || *value == 'e') {
463 /* exponential */
464 is_exp = 1;
465
466 *ptr++ = *value++;
467 if (value == end || (*value != '+' && *value != '-')) {
468 goto not_numeric;
469 }
470
471 *ptr++ = *value++;
472 if (value == end || *value < '0' || *value > '9' ||
473 (*value == '0' && value + 1 == end)) {
474 goto not_numeric;
475 }
476
477 *ptr++ = *value++;
478 while (value < end) {
479 if (*value >= '0' && *value <= '9') {
480 *ptr++ = *value++;
481
482 } else {
483 goto not_numeric;
484 }
485 }
486
487 } else {
488 goto not_numeric;
489 }
490 }
491
492 /* trim */
493 if (!is_exp) {
494 while (*(ptr - 1) == '0') {
495 ptr--;
496 }
497
498 if (*(ptr - 1) == '.') {
499 *ptr++ = '0';
500 }
501 }
502
503 type = Y_SCALAR_IS_FLOAT | Y_SCALAR_IS_DECIMAL;
504 }
505
506 } else {
507 goto not_numeric;
508 }
509
510 /* terminate */
511 *ptr = '\0';
512
513 finish:
514 /* convert & assign */
515 if ((type & Y_SCALAR_IS_INT) && lval != NULL) {
516 switch (type & Y_SCALAR_FORMAT_MASK) {
517 case Y_SCALAR_IS_BINARY:
518 ptr = buf + 2;
519 if (*ptr == 'b') {
520 ptr++;
521 }
522
523 *lval = ZEND_STRTOL(ptr, (char **) NULL, 2);
524 if (*buf == '-') {
525 *lval *= -1L;
526 }
527 break;
528
529 case Y_SCALAR_IS_OCTAL:
530 *lval = ZEND_STRTOL(buf, (char **) NULL, 8);
531 break;
532
533 case Y_SCALAR_IS_HEXADECIMAL:
534 *lval = ZEND_STRTOL(buf, (char **) NULL, 16);
535 break;
536
537 case Y_SCALAR_IS_SEXAGECIMAL:
538 *lval = eval_sexagesimal_l(0, buf, ptr);
539 if (*buf == '-') {
540 *lval *= -1L;
541 }
542 break;
543
544 default:
545 #if PHP_VERSION_ID < 80100
546 ZEND_ATOL(*lval, buf);
547 #else
548 *lval = ZEND_ATOL(buf);
549 #endif
550 break;
551 }
552
553 } else if ((type & Y_SCALAR_IS_FLOAT) && dval != NULL) {
554 switch (type & Y_SCALAR_FORMAT_MASK) {
555 case Y_SCALAR_IS_SEXAGECIMAL:
556 *dval = eval_sexagesimal_d(0.0, buf, ptr);
557 if (*buf == '-') {
558 *dval *= -1.0;
559 }
560 break;
561
562 case Y_SCALAR_IS_INFINITY_P:
563 *dval = php_get_inf();
564 break;
565
566 case Y_SCALAR_IS_INFINITY_N:
567 *dval = -php_get_inf();
568 break;
569
570 case Y_SCALAR_IS_NAN:
571 *dval = php_get_nan();
572 break;
573
574 default:
575 *dval = zend_strtod(buf, (const char **) NULL);
576 break;
577 }
578 }
579
580 if (buf != NULL) {
581 if (str != NULL) {
582 *str = buf;
583
584 } else {
585 efree(buf);
586 }
587 }
588
589 /* return */
590 return type;
591
592
593 return_zero:
594 if (lval != NULL) {
595 *lval = 0;
596 }
597
598 if (dval != NULL) {
599 *dval = 0.0;
600 }
601
602 if (buf != NULL) {
603 efree(buf);
604 }
605
606 return (Y_SCALAR_IS_INT | Y_SCALAR_IS_ZERO);
607
608
609 not_numeric:
610 if (buf != NULL) {
611 efree(buf);
612 }
613
614 return Y_SCALAR_IS_NOT_NUMERIC;
615 }
616 /* }}} */
617
618
619 /* {{{ scalar_is_timestamp(const char *,size_t)
620 * Does this scalar encode a TIMESTAMP value?
621 *
622 * specification is found at http://yaml.org/type/timestamp.html.
623 */
scalar_is_timestamp(const char * value,size_t length)624 int scalar_is_timestamp(const char *value, size_t length)
625 {
626 const char *ptr = value;
627 const char *end = value + length;
628 const char *pos1, *pos2;
629
630 if (ptr == NULL || ptr == end) {
631 return 0;
632 }
633
634 /* skip leading space */
635 ts_skip_space();
636
637 /* check 4 digit year and separator */
638 pos1 = pos2 = ptr;
639 ts_skip_number();
640 if (ptr == pos1 || ptr == end || ptr - pos1 != 4 || *ptr != '-') {
641 return 0;
642 }
643
644 /* check 1 or 2 month and separator */
645 pos2 = ++ptr;
646 ts_skip_number();
647 if (ptr == pos2 || ptr == end || ptr - pos2 > 2 || *ptr != '-') {
648 return 0;
649 }
650
651 /* check 1 or 2 digit day */
652 pos2 = ++ptr;
653 ts_skip_number();
654 if (ptr == pos2 || ptr - pos2 > 2) {
655 return 0;
656 }
657
658 /* check separator */
659 pos2 = ptr;
660 if (ptr == end) {
661 /* date only format requires YYYY-MM-DD */
662 return (pos2 - pos1 == 10) ? 1 : 0;
663 }
664
665 /* time separator is T or whitespace */
666 if (*ptr == 'T' || *ptr == 't') {
667 ptr++;
668
669 } else if (*ptr == ' ' || *ptr == '\t') {
670 ts_skip_space();
671
672 } else {
673 return 0;
674 }
675
676 /* check 1 or 2 digit hour and separator */
677 pos1 = ptr;
678 ts_skip_number();
679 if (ptr == pos1 || ptr == end || ptr - pos1 > 2 || *ptr != ':') {
680 return 0;
681 }
682
683 /* check 2 digit minute and separator */
684 pos1 = ++ptr;
685 ts_skip_number();
686 if (ptr == end || ptr - pos1 != 2 || *ptr != ':') {
687 return 0;
688 }
689
690 /* check 2 digit second */
691 pos1 = ++ptr;
692 ts_skip_number();
693 if (ptr == end) {
694 return (ptr - pos1 == 2) ? 1 : 0;
695 }
696
697 /* check optional fraction */
698 if (*ptr == '.') {
699 ptr++;
700 ts_skip_number();
701 }
702
703 /* skip optional separator space */
704 ts_skip_space();
705 if (ptr == end) {
706 return 1;
707 }
708
709 /* check time zone */
710 if (*ptr == 'Z') {
711 ptr++;
712 ts_skip_space();
713 return (ptr == end) ? 1 : 0;
714 }
715
716 /* check time zone offset sign */
717 if (*ptr != '+' && *ptr != '-') {
718 return 0;
719 }
720
721 /* check 1 or 2 digit time zone hour */
722 pos1 = ++ptr;
723 ts_skip_number();
724 if (ptr == pos1 || ptr - pos1 == 3 || ptr - pos1 > 4) {
725 return 0;
726 }
727
728 if (ptr == end) {
729 return 1;
730 }
731
732 /* optional time zone minute */
733 if (*ptr != ':') {
734 return 0;
735 }
736
737 pos1 = ++ptr;
738 ts_skip_number();
739
740 if (ptr - pos1 != 2) {
741 return 0;
742 }
743
744 /* skip following space */
745 ts_skip_space();
746 return (ptr == end) ? 1 : 0;
747 }
748 /* }}} */
749
750
751 /* {{{ eval_sexagesimal_l()
752 * Convert a base 60 number to a long
753 */
eval_sexagesimal_l(zend_long lval,const char * sg,const char * eos)754 static zend_long eval_sexagesimal_l(zend_long lval, const char *sg, const char *eos)
755 {
756 const char *ep;
757
758 while (sg < eos && (*sg < '0' || *sg > '9')) {
759 sg++;
760 }
761
762 ep = sg;
763 while (ep < eos && *ep >= '0' && *ep <= '9') {
764 ep++;
765 }
766
767 if (sg == eos) {
768 return lval;
769 }
770
771 return eval_sexagesimal_l(
772 lval * 60 + ZEND_STRTOL(sg, (char **) NULL, 10), ep, eos);
773 }
774 /* }}} */
775
776
777 /* {{{ eval_sexagesimal_d()
778 * Convert a base 60 number to a double
779 */
eval_sexagesimal_d(double dval,const char * sg,const char * eos)780 static double eval_sexagesimal_d(double dval, const char *sg, const char *eos)
781 {
782 const char *ep;
783
784 while (sg < eos && *sg != '.' && (*sg < '0' || *sg > '9')) {
785 sg++;
786 }
787
788 ep = sg;
789 while (ep < eos && *ep >= '0' && *ep <= '9') {
790 ep++;
791 }
792
793 if (sg == eos || *sg == '.') {
794 return dval;
795 }
796
797 return eval_sexagesimal_d(
798 dval * 60.0 + zend_strtod(sg, (const char **) NULL), ep, eos);
799 }
800 /* }}} */
801
802
803
804 /*
805 * Local variables:
806 * tab-width: 4
807 * c-basic-offset: 4
808 * End:
809 * vim600: noet sw=4 ts=4 fdm=marker
810 * vim<600: noet sw=4 ts=4
811 */
812