1 /*------------------------------------------------------------------------- 2 * 3 * json.c 4 * JSON data type support. 5 * 6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 7 * Portions Copyright (c) 1994, Regents of the University of California 8 * 9 * IDENTIFICATION 10 * src/backend/utils/adt/json.c 11 * 12 *------------------------------------------------------------------------- 13 */ 14 #include "postgres.h" 15 16 #include "access/htup_details.h" 17 #include "access/transam.h" 18 #include "catalog/pg_type.h" 19 #include "executor/spi.h" 20 #include "funcapi.h" 21 #include "lib/stringinfo.h" 22 #include "libpq/pqformat.h" 23 #include "mb/pg_wchar.h" 24 #include "miscadmin.h" 25 #include "parser/parse_coerce.h" 26 #include "utils/array.h" 27 #include "utils/builtins.h" 28 #include "utils/date.h" 29 #include "utils/datetime.h" 30 #include "utils/lsyscache.h" 31 #include "utils/json.h" 32 #include "utils/jsonapi.h" 33 #include "utils/typcache.h" 34 #include "utils/syscache.h" 35 36 /* 37 * The context of the parser is maintained by the recursive descent 38 * mechanism, but is passed explicitly to the error reporting routine 39 * for better diagnostics. 40 */ 41 typedef enum /* contexts of JSON parser */ 42 { 43 JSON_PARSE_VALUE, /* expecting a value */ 44 JSON_PARSE_STRING, /* expecting a string (for a field name) */ 45 JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */ 46 JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */ 47 JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */ 48 JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */ 49 JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */ 50 JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */ 51 JSON_PARSE_END /* saw the end of a document, expect nothing */ 52 } JsonParseContext; 53 54 typedef enum /* type categories for datum_to_json */ 55 { 56 JSONTYPE_NULL, /* null, so we didn't bother to identify */ 57 JSONTYPE_BOOL, /* boolean (built-in types only) */ 58 JSONTYPE_NUMERIC, /* numeric (ditto) */ 59 JSONTYPE_DATE, /* we use special formatting for datetimes */ 60 JSONTYPE_TIMESTAMP, 61 JSONTYPE_TIMESTAMPTZ, 62 JSONTYPE_JSON, /* JSON itself (and JSONB) */ 63 JSONTYPE_ARRAY, /* array */ 64 JSONTYPE_COMPOSITE, /* composite */ 65 JSONTYPE_CAST, /* something with an explicit cast to JSON */ 66 JSONTYPE_OTHER /* all else */ 67 } JsonTypeCategory; 68 69 typedef struct JsonAggState 70 { 71 StringInfo str; 72 JsonTypeCategory key_category; 73 Oid key_output_func; 74 JsonTypeCategory val_category; 75 Oid val_output_func; 76 } JsonAggState; 77 78 static inline void json_lex(JsonLexContext *lex); 79 static inline void json_lex_string(JsonLexContext *lex); 80 static inline void json_lex_number(JsonLexContext *lex, char *s, 81 bool *num_err, int *total_len); 82 static inline void parse_scalar(JsonLexContext *lex, JsonSemAction *sem); 83 static void parse_object_field(JsonLexContext *lex, JsonSemAction *sem); 84 static void parse_object(JsonLexContext *lex, JsonSemAction *sem); 85 static void parse_array_element(JsonLexContext *lex, JsonSemAction *sem); 86 static void parse_array(JsonLexContext *lex, JsonSemAction *sem); 87 static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex) pg_attribute_noreturn(); 88 static void report_invalid_token(JsonLexContext *lex) pg_attribute_noreturn(); 89 static int report_json_context(JsonLexContext *lex); 90 static char *extract_mb_char(char *s); 91 static void composite_to_json(Datum composite, StringInfo result, 92 bool use_line_feeds); 93 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, 94 Datum *vals, bool *nulls, int *valcount, 95 JsonTypeCategory tcategory, Oid outfuncoid, 96 bool use_line_feeds); 97 static void array_to_json_internal(Datum array, StringInfo result, 98 bool use_line_feeds); 99 static void json_categorize_type(Oid typoid, 100 JsonTypeCategory *tcategory, 101 Oid *outfuncoid); 102 static void datum_to_json(Datum val, bool is_null, StringInfo result, 103 JsonTypeCategory tcategory, Oid outfuncoid, 104 bool key_scalar); 105 static void add_json(Datum val, bool is_null, StringInfo result, 106 Oid val_type, bool key_scalar); 107 static text *catenate_stringinfo_string(StringInfo buffer, const char *addon); 108 109 /* the null action object used for pure validation */ 110 static JsonSemAction nullSemAction = 111 { 112 NULL, NULL, NULL, NULL, NULL, 113 NULL, NULL, NULL, NULL, NULL 114 }; 115 116 /* Recursive Descent parser support routines */ 117 118 /* 119 * lex_peek 120 * 121 * what is the current look_ahead token? 122 */ 123 static inline JsonTokenType 124 lex_peek(JsonLexContext *lex) 125 { 126 return lex->token_type; 127 } 128 129 /* 130 * lex_accept 131 * 132 * accept the look_ahead token and move the lexer to the next token if the 133 * look_ahead token matches the token parameter. In that case, and if required, 134 * also hand back the de-escaped lexeme. 135 * 136 * returns true if the token matched, false otherwise. 137 */ 138 static inline bool 139 lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme) 140 { 141 if (lex->token_type == token) 142 { 143 if (lexeme != NULL) 144 { 145 if (lex->token_type == JSON_TOKEN_STRING) 146 { 147 if (lex->strval != NULL) 148 *lexeme = pstrdup(lex->strval->data); 149 } 150 else 151 { 152 int len = (lex->token_terminator - lex->token_start); 153 char *tokstr = palloc(len + 1); 154 155 memcpy(tokstr, lex->token_start, len); 156 tokstr[len] = '\0'; 157 *lexeme = tokstr; 158 } 159 } 160 json_lex(lex); 161 return true; 162 } 163 return false; 164 } 165 166 /* 167 * lex_accept 168 * 169 * move the lexer to the next token if the current look_ahead token matches 170 * the parameter token. Otherwise, report an error. 171 */ 172 static inline void 173 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token) 174 { 175 if (!lex_accept(lex, token, NULL)) 176 report_parse_error(ctx, lex); 177 } 178 179 /* chars to consider as part of an alphanumeric token */ 180 #define JSON_ALPHANUMERIC_CHAR(c) \ 181 (((c) >= 'a' && (c) <= 'z') || \ 182 ((c) >= 'A' && (c) <= 'Z') || \ 183 ((c) >= '0' && (c) <= '9') || \ 184 (c) == '_' || \ 185 IS_HIGHBIT_SET(c)) 186 187 /* 188 * Utility function to check if a string is a valid JSON number. 189 * 190 * str is of length len, and need not be null-terminated. 191 */ 192 bool 193 IsValidJsonNumber(const char *str, int len) 194 { 195 bool numeric_error; 196 int total_len; 197 JsonLexContext dummy_lex; 198 199 if (len <= 0) 200 return false; 201 202 /* 203 * json_lex_number expects a leading '-' to have been eaten already. 204 * 205 * having to cast away the constness of str is ugly, but there's not much 206 * easy alternative. 207 */ 208 if (*str == '-') 209 { 210 dummy_lex.input = unconstify(char *, str) +1; 211 dummy_lex.input_length = len - 1; 212 } 213 else 214 { 215 dummy_lex.input = unconstify(char *, str); 216 dummy_lex.input_length = len; 217 } 218 219 json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len); 220 221 return (!numeric_error) && (total_len == dummy_lex.input_length); 222 } 223 224 /* 225 * Input. 226 */ 227 Datum 228 json_in(PG_FUNCTION_ARGS) 229 { 230 char *json = PG_GETARG_CSTRING(0); 231 text *result = cstring_to_text(json); 232 JsonLexContext *lex; 233 234 /* validate it */ 235 lex = makeJsonLexContext(result, false); 236 pg_parse_json(lex, &nullSemAction); 237 238 /* Internal representation is the same as text, for now */ 239 PG_RETURN_TEXT_P(result); 240 } 241 242 /* 243 * Output. 244 */ 245 Datum 246 json_out(PG_FUNCTION_ARGS) 247 { 248 /* we needn't detoast because text_to_cstring will handle that */ 249 Datum txt = PG_GETARG_DATUM(0); 250 251 PG_RETURN_CSTRING(TextDatumGetCString(txt)); 252 } 253 254 /* 255 * Binary send. 256 */ 257 Datum 258 json_send(PG_FUNCTION_ARGS) 259 { 260 text *t = PG_GETARG_TEXT_PP(0); 261 StringInfoData buf; 262 263 pq_begintypsend(&buf); 264 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); 265 PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); 266 } 267 268 /* 269 * Binary receive. 270 */ 271 Datum 272 json_recv(PG_FUNCTION_ARGS) 273 { 274 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); 275 char *str; 276 int nbytes; 277 JsonLexContext *lex; 278 279 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); 280 281 /* Validate it. */ 282 lex = makeJsonLexContextCstringLen(str, nbytes, false); 283 pg_parse_json(lex, &nullSemAction); 284 285 PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes)); 286 } 287 288 /* 289 * makeJsonLexContext 290 * 291 * lex constructor, with or without StringInfo object 292 * for de-escaped lexemes. 293 * 294 * Without is better as it makes the processing faster, so only make one 295 * if really required. 296 * 297 * If you already have the json as a text* value, use the first of these 298 * functions, otherwise use makeJsonLexContextCstringLen(). 299 */ 300 JsonLexContext * 301 makeJsonLexContext(text *json, bool need_escapes) 302 { 303 return makeJsonLexContextCstringLen(VARDATA_ANY(json), 304 VARSIZE_ANY_EXHDR(json), 305 need_escapes); 306 } 307 308 JsonLexContext * 309 makeJsonLexContextCstringLen(char *json, int len, bool need_escapes) 310 { 311 JsonLexContext *lex = palloc0(sizeof(JsonLexContext)); 312 313 lex->input = lex->token_terminator = lex->line_start = json; 314 lex->line_number = 1; 315 lex->input_length = len; 316 if (need_escapes) 317 lex->strval = makeStringInfo(); 318 return lex; 319 } 320 321 /* 322 * pg_parse_json 323 * 324 * Publicly visible entry point for the JSON parser. 325 * 326 * lex is a lexing context, set up for the json to be processed by calling 327 * makeJsonLexContext(). sem is a structure of function pointers to semantic 328 * action routines to be called at appropriate spots during parsing, and a 329 * pointer to a state object to be passed to those routines. 330 */ 331 void 332 pg_parse_json(JsonLexContext *lex, JsonSemAction *sem) 333 { 334 JsonTokenType tok; 335 336 /* get the initial token */ 337 json_lex(lex); 338 339 tok = lex_peek(lex); 340 341 /* parse by recursive descent */ 342 switch (tok) 343 { 344 case JSON_TOKEN_OBJECT_START: 345 parse_object(lex, sem); 346 break; 347 case JSON_TOKEN_ARRAY_START: 348 parse_array(lex, sem); 349 break; 350 default: 351 parse_scalar(lex, sem); /* json can be a bare scalar */ 352 } 353 354 lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END); 355 356 } 357 358 /* 359 * json_count_array_elements 360 * 361 * Returns number of array elements in lex context at start of array token 362 * until end of array token at same nesting level. 363 * 364 * Designed to be called from array_start routines. 365 */ 366 int 367 json_count_array_elements(JsonLexContext *lex) 368 { 369 JsonLexContext copylex; 370 int count; 371 372 /* 373 * It's safe to do this with a shallow copy because the lexical routines 374 * don't scribble on the input. They do scribble on the other pointers 375 * etc, so doing this with a copy makes that safe. 376 */ 377 memcpy(©lex, lex, sizeof(JsonLexContext)); 378 copylex.strval = NULL; /* not interested in values here */ 379 copylex.lex_level++; 380 381 count = 0; 382 lex_expect(JSON_PARSE_ARRAY_START, ©lex, JSON_TOKEN_ARRAY_START); 383 if (lex_peek(©lex) != JSON_TOKEN_ARRAY_END) 384 { 385 do 386 { 387 count++; 388 parse_array_element(©lex, &nullSemAction); 389 } 390 while (lex_accept(©lex, JSON_TOKEN_COMMA, NULL)); 391 } 392 lex_expect(JSON_PARSE_ARRAY_NEXT, ©lex, JSON_TOKEN_ARRAY_END); 393 394 return count; 395 } 396 397 /* 398 * Recursive Descent parse routines. There is one for each structural 399 * element in a json document: 400 * - scalar (string, number, true, false, null) 401 * - array ( [ ] ) 402 * - array element 403 * - object ( { } ) 404 * - object field 405 */ 406 static inline void 407 parse_scalar(JsonLexContext *lex, JsonSemAction *sem) 408 { 409 char *val = NULL; 410 json_scalar_action sfunc = sem->scalar; 411 char **valaddr; 412 JsonTokenType tok = lex_peek(lex); 413 414 valaddr = sfunc == NULL ? NULL : &val; 415 416 /* a scalar must be a string, a number, true, false, or null */ 417 switch (tok) 418 { 419 case JSON_TOKEN_TRUE: 420 lex_accept(lex, JSON_TOKEN_TRUE, valaddr); 421 break; 422 case JSON_TOKEN_FALSE: 423 lex_accept(lex, JSON_TOKEN_FALSE, valaddr); 424 break; 425 case JSON_TOKEN_NULL: 426 lex_accept(lex, JSON_TOKEN_NULL, valaddr); 427 break; 428 case JSON_TOKEN_NUMBER: 429 lex_accept(lex, JSON_TOKEN_NUMBER, valaddr); 430 break; 431 case JSON_TOKEN_STRING: 432 lex_accept(lex, JSON_TOKEN_STRING, valaddr); 433 break; 434 default: 435 report_parse_error(JSON_PARSE_VALUE, lex); 436 } 437 438 if (sfunc != NULL) 439 (*sfunc) (sem->semstate, val, tok); 440 } 441 442 static void 443 parse_object_field(JsonLexContext *lex, JsonSemAction *sem) 444 { 445 /* 446 * An object field is "fieldname" : value where value can be a scalar, 447 * object or array. Note: in user-facing docs and error messages, we 448 * generally call a field name a "key". 449 */ 450 451 char *fname = NULL; /* keep compiler quiet */ 452 json_ofield_action ostart = sem->object_field_start; 453 json_ofield_action oend = sem->object_field_end; 454 bool isnull; 455 char **fnameaddr = NULL; 456 JsonTokenType tok; 457 458 if (ostart != NULL || oend != NULL) 459 fnameaddr = &fname; 460 461 if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr)) 462 report_parse_error(JSON_PARSE_STRING, lex); 463 464 lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON); 465 466 tok = lex_peek(lex); 467 isnull = tok == JSON_TOKEN_NULL; 468 469 if (ostart != NULL) 470 (*ostart) (sem->semstate, fname, isnull); 471 472 switch (tok) 473 { 474 case JSON_TOKEN_OBJECT_START: 475 parse_object(lex, sem); 476 break; 477 case JSON_TOKEN_ARRAY_START: 478 parse_array(lex, sem); 479 break; 480 default: 481 parse_scalar(lex, sem); 482 } 483 484 if (oend != NULL) 485 (*oend) (sem->semstate, fname, isnull); 486 } 487 488 static void 489 parse_object(JsonLexContext *lex, JsonSemAction *sem) 490 { 491 /* 492 * an object is a possibly empty sequence of object fields, separated by 493 * commas and surrounded by curly braces. 494 */ 495 json_struct_action ostart = sem->object_start; 496 json_struct_action oend = sem->object_end; 497 JsonTokenType tok; 498 499 check_stack_depth(); 500 501 if (ostart != NULL) 502 (*ostart) (sem->semstate); 503 504 /* 505 * Data inside an object is at a higher nesting level than the object 506 * itself. Note that we increment this after we call the semantic routine 507 * for the object start and restore it before we call the routine for the 508 * object end. 509 */ 510 lex->lex_level++; 511 512 /* we know this will succeed, just clearing the token */ 513 lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START); 514 515 tok = lex_peek(lex); 516 switch (tok) 517 { 518 case JSON_TOKEN_STRING: 519 parse_object_field(lex, sem); 520 while (lex_accept(lex, JSON_TOKEN_COMMA, NULL)) 521 parse_object_field(lex, sem); 522 break; 523 case JSON_TOKEN_OBJECT_END: 524 break; 525 default: 526 /* case of an invalid initial token inside the object */ 527 report_parse_error(JSON_PARSE_OBJECT_START, lex); 528 } 529 530 lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END); 531 532 lex->lex_level--; 533 534 if (oend != NULL) 535 (*oend) (sem->semstate); 536 } 537 538 static void 539 parse_array_element(JsonLexContext *lex, JsonSemAction *sem) 540 { 541 json_aelem_action astart = sem->array_element_start; 542 json_aelem_action aend = sem->array_element_end; 543 JsonTokenType tok = lex_peek(lex); 544 545 bool isnull; 546 547 isnull = tok == JSON_TOKEN_NULL; 548 549 if (astart != NULL) 550 (*astart) (sem->semstate, isnull); 551 552 /* an array element is any object, array or scalar */ 553 switch (tok) 554 { 555 case JSON_TOKEN_OBJECT_START: 556 parse_object(lex, sem); 557 break; 558 case JSON_TOKEN_ARRAY_START: 559 parse_array(lex, sem); 560 break; 561 default: 562 parse_scalar(lex, sem); 563 } 564 565 if (aend != NULL) 566 (*aend) (sem->semstate, isnull); 567 } 568 569 static void 570 parse_array(JsonLexContext *lex, JsonSemAction *sem) 571 { 572 /* 573 * an array is a possibly empty sequence of array elements, separated by 574 * commas and surrounded by square brackets. 575 */ 576 json_struct_action astart = sem->array_start; 577 json_struct_action aend = sem->array_end; 578 579 check_stack_depth(); 580 581 if (astart != NULL) 582 (*astart) (sem->semstate); 583 584 /* 585 * Data inside an array is at a higher nesting level than the array 586 * itself. Note that we increment this after we call the semantic routine 587 * for the array start and restore it before we call the routine for the 588 * array end. 589 */ 590 lex->lex_level++; 591 592 lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START); 593 if (lex_peek(lex) != JSON_TOKEN_ARRAY_END) 594 { 595 596 parse_array_element(lex, sem); 597 598 while (lex_accept(lex, JSON_TOKEN_COMMA, NULL)) 599 parse_array_element(lex, sem); 600 } 601 602 lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END); 603 604 lex->lex_level--; 605 606 if (aend != NULL) 607 (*aend) (sem->semstate); 608 } 609 610 /* 611 * Lex one token from the input stream. 612 */ 613 static inline void 614 json_lex(JsonLexContext *lex) 615 { 616 char *s; 617 int len; 618 619 /* Skip leading whitespace. */ 620 s = lex->token_terminator; 621 len = s - lex->input; 622 while (len < lex->input_length && 623 (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')) 624 { 625 if (*s == '\n') 626 ++lex->line_number; 627 ++s; 628 ++len; 629 } 630 lex->token_start = s; 631 632 /* Determine token type. */ 633 if (len >= lex->input_length) 634 { 635 lex->token_start = NULL; 636 lex->prev_token_terminator = lex->token_terminator; 637 lex->token_terminator = s; 638 lex->token_type = JSON_TOKEN_END; 639 } 640 else 641 switch (*s) 642 { 643 /* Single-character token, some kind of punctuation mark. */ 644 case '{': 645 lex->prev_token_terminator = lex->token_terminator; 646 lex->token_terminator = s + 1; 647 lex->token_type = JSON_TOKEN_OBJECT_START; 648 break; 649 case '}': 650 lex->prev_token_terminator = lex->token_terminator; 651 lex->token_terminator = s + 1; 652 lex->token_type = JSON_TOKEN_OBJECT_END; 653 break; 654 case '[': 655 lex->prev_token_terminator = lex->token_terminator; 656 lex->token_terminator = s + 1; 657 lex->token_type = JSON_TOKEN_ARRAY_START; 658 break; 659 case ']': 660 lex->prev_token_terminator = lex->token_terminator; 661 lex->token_terminator = s + 1; 662 lex->token_type = JSON_TOKEN_ARRAY_END; 663 break; 664 case ',': 665 lex->prev_token_terminator = lex->token_terminator; 666 lex->token_terminator = s + 1; 667 lex->token_type = JSON_TOKEN_COMMA; 668 break; 669 case ':': 670 lex->prev_token_terminator = lex->token_terminator; 671 lex->token_terminator = s + 1; 672 lex->token_type = JSON_TOKEN_COLON; 673 break; 674 case '"': 675 /* string */ 676 json_lex_string(lex); 677 lex->token_type = JSON_TOKEN_STRING; 678 break; 679 case '-': 680 /* Negative number. */ 681 json_lex_number(lex, s + 1, NULL, NULL); 682 lex->token_type = JSON_TOKEN_NUMBER; 683 break; 684 case '0': 685 case '1': 686 case '2': 687 case '3': 688 case '4': 689 case '5': 690 case '6': 691 case '7': 692 case '8': 693 case '9': 694 /* Positive number. */ 695 json_lex_number(lex, s, NULL, NULL); 696 lex->token_type = JSON_TOKEN_NUMBER; 697 break; 698 default: 699 { 700 char *p; 701 702 /* 703 * We're not dealing with a string, number, legal 704 * punctuation mark, or end of string. The only legal 705 * tokens we might find here are true, false, and null, 706 * but for error reporting purposes we scan until we see a 707 * non-alphanumeric character. That way, we can report 708 * the whole word as an unexpected token, rather than just 709 * some unintuitive prefix thereof. 710 */ 711 for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++) 712 /* skip */ ; 713 714 /* 715 * We got some sort of unexpected punctuation or an 716 * otherwise unexpected character, so just complain about 717 * that one character. 718 */ 719 if (p == s) 720 { 721 lex->prev_token_terminator = lex->token_terminator; 722 lex->token_terminator = s + 1; 723 report_invalid_token(lex); 724 } 725 726 /* 727 * We've got a real alphanumeric token here. If it 728 * happens to be true, false, or null, all is well. If 729 * not, error out. 730 */ 731 lex->prev_token_terminator = lex->token_terminator; 732 lex->token_terminator = p; 733 if (p - s == 4) 734 { 735 if (memcmp(s, "true", 4) == 0) 736 lex->token_type = JSON_TOKEN_TRUE; 737 else if (memcmp(s, "null", 4) == 0) 738 lex->token_type = JSON_TOKEN_NULL; 739 else 740 report_invalid_token(lex); 741 } 742 else if (p - s == 5 && memcmp(s, "false", 5) == 0) 743 lex->token_type = JSON_TOKEN_FALSE; 744 else 745 report_invalid_token(lex); 746 747 } 748 } /* end of switch */ 749 } 750 751 /* 752 * The next token in the input stream is known to be a string; lex it. 753 */ 754 static inline void 755 json_lex_string(JsonLexContext *lex) 756 { 757 char *s; 758 int len; 759 int hi_surrogate = -1; 760 761 if (lex->strval != NULL) 762 resetStringInfo(lex->strval); 763 764 Assert(lex->input_length > 0); 765 s = lex->token_start; 766 len = lex->token_start - lex->input; 767 for (;;) 768 { 769 s++; 770 len++; 771 /* Premature end of the string. */ 772 if (len >= lex->input_length) 773 { 774 lex->token_terminator = s; 775 report_invalid_token(lex); 776 } 777 else if (*s == '"') 778 break; 779 else if ((unsigned char) *s < 32) 780 { 781 /* Per RFC4627, these characters MUST be escaped. */ 782 /* Since *s isn't printable, exclude it from the context string */ 783 lex->token_terminator = s; 784 ereport(ERROR, 785 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 786 errmsg("invalid input syntax for type %s", "json"), 787 errdetail("Character with value 0x%02x must be escaped.", 788 (unsigned char) *s), 789 report_json_context(lex))); 790 } 791 else if (*s == '\\') 792 { 793 /* OK, we have an escape character. */ 794 s++; 795 len++; 796 if (len >= lex->input_length) 797 { 798 lex->token_terminator = s; 799 report_invalid_token(lex); 800 } 801 else if (*s == 'u') 802 { 803 int i; 804 int ch = 0; 805 806 for (i = 1; i <= 4; i++) 807 { 808 s++; 809 len++; 810 if (len >= lex->input_length) 811 { 812 lex->token_terminator = s; 813 report_invalid_token(lex); 814 } 815 else if (*s >= '0' && *s <= '9') 816 ch = (ch * 16) + (*s - '0'); 817 else if (*s >= 'a' && *s <= 'f') 818 ch = (ch * 16) + (*s - 'a') + 10; 819 else if (*s >= 'A' && *s <= 'F') 820 ch = (ch * 16) + (*s - 'A') + 10; 821 else 822 { 823 lex->token_terminator = s + pg_mblen(s); 824 ereport(ERROR, 825 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 826 errmsg("invalid input syntax for type %s", 827 "json"), 828 errdetail("\"\\u\" must be followed by four hexadecimal digits."), 829 report_json_context(lex))); 830 } 831 } 832 if (lex->strval != NULL) 833 { 834 char utf8str[5]; 835 int utf8len; 836 837 if (ch >= 0xd800 && ch <= 0xdbff) 838 { 839 if (hi_surrogate != -1) 840 ereport(ERROR, 841 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 842 errmsg("invalid input syntax for type %s", 843 "json"), 844 errdetail("Unicode high surrogate must not follow a high surrogate."), 845 report_json_context(lex))); 846 hi_surrogate = (ch & 0x3ff) << 10; 847 continue; 848 } 849 else if (ch >= 0xdc00 && ch <= 0xdfff) 850 { 851 if (hi_surrogate == -1) 852 ereport(ERROR, 853 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 854 errmsg("invalid input syntax for type %s", "json"), 855 errdetail("Unicode low surrogate must follow a high surrogate."), 856 report_json_context(lex))); 857 ch = 0x10000 + hi_surrogate + (ch & 0x3ff); 858 hi_surrogate = -1; 859 } 860 861 if (hi_surrogate != -1) 862 ereport(ERROR, 863 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 864 errmsg("invalid input syntax for type %s", "json"), 865 errdetail("Unicode low surrogate must follow a high surrogate."), 866 report_json_context(lex))); 867 868 /* 869 * For UTF8, replace the escape sequence by the actual 870 * utf8 character in lex->strval. Do this also for other 871 * encodings if the escape designates an ASCII character, 872 * otherwise raise an error. 873 */ 874 875 if (ch == 0) 876 { 877 /* We can't allow this, since our TEXT type doesn't */ 878 ereport(ERROR, 879 (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), 880 errmsg("unsupported Unicode escape sequence"), 881 errdetail("\\u0000 cannot be converted to text."), 882 report_json_context(lex))); 883 } 884 else if (GetDatabaseEncoding() == PG_UTF8) 885 { 886 unicode_to_utf8(ch, (unsigned char *) utf8str); 887 utf8len = pg_utf_mblen((unsigned char *) utf8str); 888 appendBinaryStringInfo(lex->strval, utf8str, utf8len); 889 } 890 else if (ch <= 0x007f) 891 { 892 /* 893 * This is the only way to designate things like a 894 * form feed character in JSON, so it's useful in all 895 * encodings. 896 */ 897 appendStringInfoChar(lex->strval, (char) ch); 898 } 899 else 900 { 901 ereport(ERROR, 902 (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), 903 errmsg("unsupported Unicode escape sequence"), 904 errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."), 905 report_json_context(lex))); 906 } 907 908 } 909 } 910 else if (lex->strval != NULL) 911 { 912 if (hi_surrogate != -1) 913 ereport(ERROR, 914 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 915 errmsg("invalid input syntax for type %s", 916 "json"), 917 errdetail("Unicode low surrogate must follow a high surrogate."), 918 report_json_context(lex))); 919 920 switch (*s) 921 { 922 case '"': 923 case '\\': 924 case '/': 925 appendStringInfoChar(lex->strval, *s); 926 break; 927 case 'b': 928 appendStringInfoChar(lex->strval, '\b'); 929 break; 930 case 'f': 931 appendStringInfoChar(lex->strval, '\f'); 932 break; 933 case 'n': 934 appendStringInfoChar(lex->strval, '\n'); 935 break; 936 case 'r': 937 appendStringInfoChar(lex->strval, '\r'); 938 break; 939 case 't': 940 appendStringInfoChar(lex->strval, '\t'); 941 break; 942 default: 943 /* Not a valid string escape, so error out. */ 944 lex->token_terminator = s + pg_mblen(s); 945 ereport(ERROR, 946 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 947 errmsg("invalid input syntax for type %s", 948 "json"), 949 errdetail("Escape sequence \"\\%s\" is invalid.", 950 extract_mb_char(s)), 951 report_json_context(lex))); 952 } 953 } 954 else if (strchr("\"\\/bfnrt", *s) == NULL) 955 { 956 /* 957 * Simpler processing if we're not bothered about de-escaping 958 * 959 * It's very tempting to remove the strchr() call here and 960 * replace it with a switch statement, but testing so far has 961 * shown it's not a performance win. 962 */ 963 lex->token_terminator = s + pg_mblen(s); 964 ereport(ERROR, 965 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 966 errmsg("invalid input syntax for type %s", "json"), 967 errdetail("Escape sequence \"\\%s\" is invalid.", 968 extract_mb_char(s)), 969 report_json_context(lex))); 970 } 971 972 } 973 else if (lex->strval != NULL) 974 { 975 if (hi_surrogate != -1) 976 ereport(ERROR, 977 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 978 errmsg("invalid input syntax for type %s", "json"), 979 errdetail("Unicode low surrogate must follow a high surrogate."), 980 report_json_context(lex))); 981 982 appendStringInfoChar(lex->strval, *s); 983 } 984 985 } 986 987 if (hi_surrogate != -1) 988 ereport(ERROR, 989 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 990 errmsg("invalid input syntax for type %s", "json"), 991 errdetail("Unicode low surrogate must follow a high surrogate."), 992 report_json_context(lex))); 993 994 /* Hooray, we found the end of the string! */ 995 lex->prev_token_terminator = lex->token_terminator; 996 lex->token_terminator = s + 1; 997 } 998 999 /* 1000 * The next token in the input stream is known to be a number; lex it. 1001 * 1002 * In JSON, a number consists of four parts: 1003 * 1004 * (1) An optional minus sign ('-'). 1005 * 1006 * (2) Either a single '0', or a string of one or more digits that does not 1007 * begin with a '0'. 1008 * 1009 * (3) An optional decimal part, consisting of a period ('.') followed by 1010 * one or more digits. (Note: While this part can be omitted 1011 * completely, it's not OK to have only the decimal point without 1012 * any digits afterwards.) 1013 * 1014 * (4) An optional exponent part, consisting of 'e' or 'E', optionally 1015 * followed by '+' or '-', followed by one or more digits. (Note: 1016 * As with the decimal part, if 'e' or 'E' is present, it must be 1017 * followed by at least one digit.) 1018 * 1019 * The 's' argument to this function points to the ostensible beginning 1020 * of part 2 - i.e. the character after any optional minus sign, or the 1021 * first character of the string if there is none. 1022 * 1023 * If num_err is not NULL, we return an error flag to *num_err rather than 1024 * raising an error for a badly-formed number. Also, if total_len is not NULL 1025 * the distance from lex->input to the token end+1 is returned to *total_len. 1026 */ 1027 static inline void 1028 json_lex_number(JsonLexContext *lex, char *s, 1029 bool *num_err, int *total_len) 1030 { 1031 bool error = false; 1032 int len = s - lex->input; 1033 1034 /* Part (1): leading sign indicator. */ 1035 /* Caller already did this for us; so do nothing. */ 1036 1037 /* Part (2): parse main digit string. */ 1038 if (len < lex->input_length && *s == '0') 1039 { 1040 s++; 1041 len++; 1042 } 1043 else if (len < lex->input_length && *s >= '1' && *s <= '9') 1044 { 1045 do 1046 { 1047 s++; 1048 len++; 1049 } while (len < lex->input_length && *s >= '0' && *s <= '9'); 1050 } 1051 else 1052 error = true; 1053 1054 /* Part (3): parse optional decimal portion. */ 1055 if (len < lex->input_length && *s == '.') 1056 { 1057 s++; 1058 len++; 1059 if (len == lex->input_length || *s < '0' || *s > '9') 1060 error = true; 1061 else 1062 { 1063 do 1064 { 1065 s++; 1066 len++; 1067 } while (len < lex->input_length && *s >= '0' && *s <= '9'); 1068 } 1069 } 1070 1071 /* Part (4): parse optional exponent. */ 1072 if (len < lex->input_length && (*s == 'e' || *s == 'E')) 1073 { 1074 s++; 1075 len++; 1076 if (len < lex->input_length && (*s == '+' || *s == '-')) 1077 { 1078 s++; 1079 len++; 1080 } 1081 if (len == lex->input_length || *s < '0' || *s > '9') 1082 error = true; 1083 else 1084 { 1085 do 1086 { 1087 s++; 1088 len++; 1089 } while (len < lex->input_length && *s >= '0' && *s <= '9'); 1090 } 1091 } 1092 1093 /* 1094 * Check for trailing garbage. As in json_lex(), any alphanumeric stuff 1095 * here should be considered part of the token for error-reporting 1096 * purposes. 1097 */ 1098 for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++) 1099 error = true; 1100 1101 if (total_len != NULL) 1102 *total_len = len; 1103 1104 if (num_err != NULL) 1105 { 1106 /* let the caller handle any error */ 1107 *num_err = error; 1108 } 1109 else 1110 { 1111 /* return token endpoint */ 1112 lex->prev_token_terminator = lex->token_terminator; 1113 lex->token_terminator = s; 1114 /* handle error if any */ 1115 if (error) 1116 report_invalid_token(lex); 1117 } 1118 } 1119 1120 /* 1121 * Report a parse error. 1122 * 1123 * lex->token_start and lex->token_terminator must identify the current token. 1124 */ 1125 static void 1126 report_parse_error(JsonParseContext ctx, JsonLexContext *lex) 1127 { 1128 char *token; 1129 int toklen; 1130 1131 /* Handle case where the input ended prematurely. */ 1132 if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END) 1133 ereport(ERROR, 1134 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1135 errmsg("invalid input syntax for type %s", "json"), 1136 errdetail("The input string ended unexpectedly."), 1137 report_json_context(lex))); 1138 1139 /* Separate out the current token. */ 1140 toklen = lex->token_terminator - lex->token_start; 1141 token = palloc(toklen + 1); 1142 memcpy(token, lex->token_start, toklen); 1143 token[toklen] = '\0'; 1144 1145 /* Complain, with the appropriate detail message. */ 1146 if (ctx == JSON_PARSE_END) 1147 ereport(ERROR, 1148 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1149 errmsg("invalid input syntax for type %s", "json"), 1150 errdetail("Expected end of input, but found \"%s\".", 1151 token), 1152 report_json_context(lex))); 1153 else 1154 { 1155 switch (ctx) 1156 { 1157 case JSON_PARSE_VALUE: 1158 ereport(ERROR, 1159 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1160 errmsg("invalid input syntax for type %s", "json"), 1161 errdetail("Expected JSON value, but found \"%s\".", 1162 token), 1163 report_json_context(lex))); 1164 break; 1165 case JSON_PARSE_STRING: 1166 ereport(ERROR, 1167 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1168 errmsg("invalid input syntax for type %s", "json"), 1169 errdetail("Expected string, but found \"%s\".", 1170 token), 1171 report_json_context(lex))); 1172 break; 1173 case JSON_PARSE_ARRAY_START: 1174 ereport(ERROR, 1175 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1176 errmsg("invalid input syntax for type %s", "json"), 1177 errdetail("Expected array element or \"]\", but found \"%s\".", 1178 token), 1179 report_json_context(lex))); 1180 break; 1181 case JSON_PARSE_ARRAY_NEXT: 1182 ereport(ERROR, 1183 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1184 errmsg("invalid input syntax for type %s", "json"), 1185 errdetail("Expected \",\" or \"]\", but found \"%s\".", 1186 token), 1187 report_json_context(lex))); 1188 break; 1189 case JSON_PARSE_OBJECT_START: 1190 ereport(ERROR, 1191 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1192 errmsg("invalid input syntax for type %s", "json"), 1193 errdetail("Expected string or \"}\", but found \"%s\".", 1194 token), 1195 report_json_context(lex))); 1196 break; 1197 case JSON_PARSE_OBJECT_LABEL: 1198 ereport(ERROR, 1199 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1200 errmsg("invalid input syntax for type %s", "json"), 1201 errdetail("Expected \":\", but found \"%s\".", 1202 token), 1203 report_json_context(lex))); 1204 break; 1205 case JSON_PARSE_OBJECT_NEXT: 1206 ereport(ERROR, 1207 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1208 errmsg("invalid input syntax for type %s", "json"), 1209 errdetail("Expected \",\" or \"}\", but found \"%s\".", 1210 token), 1211 report_json_context(lex))); 1212 break; 1213 case JSON_PARSE_OBJECT_COMMA: 1214 ereport(ERROR, 1215 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1216 errmsg("invalid input syntax for type %s", "json"), 1217 errdetail("Expected string, but found \"%s\".", 1218 token), 1219 report_json_context(lex))); 1220 break; 1221 default: 1222 elog(ERROR, "unexpected json parse state: %d", ctx); 1223 } 1224 } 1225 } 1226 1227 /* 1228 * Report an invalid input token. 1229 * 1230 * lex->token_start and lex->token_terminator must identify the token. 1231 */ 1232 static void 1233 report_invalid_token(JsonLexContext *lex) 1234 { 1235 char *token; 1236 int toklen; 1237 1238 /* Separate out the offending token. */ 1239 toklen = lex->token_terminator - lex->token_start; 1240 token = palloc(toklen + 1); 1241 memcpy(token, lex->token_start, toklen); 1242 token[toklen] = '\0'; 1243 1244 ereport(ERROR, 1245 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1246 errmsg("invalid input syntax for type %s", "json"), 1247 errdetail("Token \"%s\" is invalid.", token), 1248 report_json_context(lex))); 1249 } 1250 1251 /* 1252 * Report a CONTEXT line for bogus JSON input. 1253 * 1254 * lex->token_terminator must be set to identify the spot where we detected 1255 * the error. Note that lex->token_start might be NULL, in case we recognized 1256 * error at EOF. 1257 * 1258 * The return value isn't meaningful, but we make it non-void so that this 1259 * can be invoked inside ereport(). 1260 */ 1261 static int 1262 report_json_context(JsonLexContext *lex) 1263 { 1264 const char *context_start; 1265 const char *context_end; 1266 const char *line_start; 1267 int line_number; 1268 char *ctxt; 1269 int ctxtlen; 1270 const char *prefix; 1271 const char *suffix; 1272 1273 /* Choose boundaries for the part of the input we will display */ 1274 context_start = lex->input; 1275 context_end = lex->token_terminator; 1276 line_start = context_start; 1277 line_number = 1; 1278 for (;;) 1279 { 1280 /* Always advance over newlines */ 1281 if (context_start < context_end && *context_start == '\n') 1282 { 1283 context_start++; 1284 line_start = context_start; 1285 line_number++; 1286 continue; 1287 } 1288 /* Otherwise, done as soon as we are close enough to context_end */ 1289 if (context_end - context_start < 50) 1290 break; 1291 /* Advance to next multibyte character */ 1292 if (IS_HIGHBIT_SET(*context_start)) 1293 context_start += pg_mblen(context_start); 1294 else 1295 context_start++; 1296 } 1297 1298 /* 1299 * We add "..." to indicate that the excerpt doesn't start at the 1300 * beginning of the line ... but if we're within 3 characters of the 1301 * beginning of the line, we might as well just show the whole line. 1302 */ 1303 if (context_start - line_start <= 3) 1304 context_start = line_start; 1305 1306 /* Get a null-terminated copy of the data to present */ 1307 ctxtlen = context_end - context_start; 1308 ctxt = palloc(ctxtlen + 1); 1309 memcpy(ctxt, context_start, ctxtlen); 1310 ctxt[ctxtlen] = '\0'; 1311 1312 /* 1313 * Show the context, prefixing "..." if not starting at start of line, and 1314 * suffixing "..." if not ending at end of line. 1315 */ 1316 prefix = (context_start > line_start) ? "..." : ""; 1317 suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : ""; 1318 1319 return errcontext("JSON data, line %d: %s%s%s", 1320 line_number, prefix, ctxt, suffix); 1321 } 1322 1323 /* 1324 * Extract a single, possibly multi-byte char from the input string. 1325 */ 1326 static char * 1327 extract_mb_char(char *s) 1328 { 1329 char *res; 1330 int len; 1331 1332 len = pg_mblen(s); 1333 res = palloc(len + 1); 1334 memcpy(res, s, len); 1335 res[len] = '\0'; 1336 1337 return res; 1338 } 1339 1340 /* 1341 * Determine how we want to print values of a given type in datum_to_json. 1342 * 1343 * Given the datatype OID, return its JsonTypeCategory, as well as the type's 1344 * output function OID. If the returned category is JSONTYPE_CAST, we 1345 * return the OID of the type->JSON cast function instead. 1346 */ 1347 static void 1348 json_categorize_type(Oid typoid, 1349 JsonTypeCategory *tcategory, 1350 Oid *outfuncoid) 1351 { 1352 bool typisvarlena; 1353 1354 /* Look through any domain */ 1355 typoid = getBaseType(typoid); 1356 1357 *outfuncoid = InvalidOid; 1358 1359 /* 1360 * We need to get the output function for everything except date and 1361 * timestamp types, array and composite types, booleans, and non-builtin 1362 * types where there's a cast to json. 1363 */ 1364 1365 switch (typoid) 1366 { 1367 case BOOLOID: 1368 *tcategory = JSONTYPE_BOOL; 1369 break; 1370 1371 case INT2OID: 1372 case INT4OID: 1373 case INT8OID: 1374 case FLOAT4OID: 1375 case FLOAT8OID: 1376 case NUMERICOID: 1377 getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); 1378 *tcategory = JSONTYPE_NUMERIC; 1379 break; 1380 1381 case DATEOID: 1382 *tcategory = JSONTYPE_DATE; 1383 break; 1384 1385 case TIMESTAMPOID: 1386 *tcategory = JSONTYPE_TIMESTAMP; 1387 break; 1388 1389 case TIMESTAMPTZOID: 1390 *tcategory = JSONTYPE_TIMESTAMPTZ; 1391 break; 1392 1393 case JSONOID: 1394 case JSONBOID: 1395 getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); 1396 *tcategory = JSONTYPE_JSON; 1397 break; 1398 1399 default: 1400 /* Check for arrays and composites */ 1401 if (OidIsValid(get_element_type(typoid)) || typoid == ANYARRAYOID 1402 || typoid == RECORDARRAYOID) 1403 *tcategory = JSONTYPE_ARRAY; 1404 else if (type_is_rowtype(typoid)) /* includes RECORDOID */ 1405 *tcategory = JSONTYPE_COMPOSITE; 1406 else 1407 { 1408 /* It's probably the general case ... */ 1409 *tcategory = JSONTYPE_OTHER; 1410 /* but let's look for a cast to json, if it's not built-in */ 1411 if (typoid >= FirstNormalObjectId) 1412 { 1413 Oid castfunc; 1414 CoercionPathType ctype; 1415 1416 ctype = find_coercion_pathway(JSONOID, typoid, 1417 COERCION_EXPLICIT, 1418 &castfunc); 1419 if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc)) 1420 { 1421 *tcategory = JSONTYPE_CAST; 1422 *outfuncoid = castfunc; 1423 } 1424 else 1425 { 1426 /* non builtin type with no cast */ 1427 getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); 1428 } 1429 } 1430 else 1431 { 1432 /* any other builtin type */ 1433 getTypeOutputInfo(typoid, outfuncoid, &typisvarlena); 1434 } 1435 } 1436 break; 1437 } 1438 } 1439 1440 /* 1441 * Turn a Datum into JSON text, appending the string to "result". 1442 * 1443 * tcategory and outfuncoid are from a previous call to json_categorize_type, 1444 * except that if is_null is true then they can be invalid. 1445 * 1446 * If key_scalar is true, the value is being printed as a key, so insist 1447 * it's of an acceptable type, and force it to be quoted. 1448 */ 1449 static void 1450 datum_to_json(Datum val, bool is_null, StringInfo result, 1451 JsonTypeCategory tcategory, Oid outfuncoid, 1452 bool key_scalar) 1453 { 1454 char *outputstr; 1455 text *jsontext; 1456 1457 check_stack_depth(); 1458 1459 /* callers are expected to ensure that null keys are not passed in */ 1460 Assert(!(key_scalar && is_null)); 1461 1462 if (is_null) 1463 { 1464 appendStringInfoString(result, "null"); 1465 return; 1466 } 1467 1468 if (key_scalar && 1469 (tcategory == JSONTYPE_ARRAY || 1470 tcategory == JSONTYPE_COMPOSITE || 1471 tcategory == JSONTYPE_JSON || 1472 tcategory == JSONTYPE_CAST)) 1473 ereport(ERROR, 1474 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 1475 errmsg("key value must be scalar, not array, composite, or json"))); 1476 1477 switch (tcategory) 1478 { 1479 case JSONTYPE_ARRAY: 1480 array_to_json_internal(val, result, false); 1481 break; 1482 case JSONTYPE_COMPOSITE: 1483 composite_to_json(val, result, false); 1484 break; 1485 case JSONTYPE_BOOL: 1486 outputstr = DatumGetBool(val) ? "true" : "false"; 1487 if (key_scalar) 1488 escape_json(result, outputstr); 1489 else 1490 appendStringInfoString(result, outputstr); 1491 break; 1492 case JSONTYPE_NUMERIC: 1493 outputstr = OidOutputFunctionCall(outfuncoid, val); 1494 1495 /* 1496 * Don't call escape_json for a non-key if it's a valid JSON 1497 * number. 1498 */ 1499 if (!key_scalar && IsValidJsonNumber(outputstr, strlen(outputstr))) 1500 appendStringInfoString(result, outputstr); 1501 else 1502 escape_json(result, outputstr); 1503 pfree(outputstr); 1504 break; 1505 case JSONTYPE_DATE: 1506 { 1507 char buf[MAXDATELEN + 1]; 1508 1509 JsonEncodeDateTime(buf, val, DATEOID); 1510 appendStringInfo(result, "\"%s\"", buf); 1511 } 1512 break; 1513 case JSONTYPE_TIMESTAMP: 1514 { 1515 char buf[MAXDATELEN + 1]; 1516 1517 JsonEncodeDateTime(buf, val, TIMESTAMPOID); 1518 appendStringInfo(result, "\"%s\"", buf); 1519 } 1520 break; 1521 case JSONTYPE_TIMESTAMPTZ: 1522 { 1523 char buf[MAXDATELEN + 1]; 1524 1525 JsonEncodeDateTime(buf, val, TIMESTAMPTZOID); 1526 appendStringInfo(result, "\"%s\"", buf); 1527 } 1528 break; 1529 case JSONTYPE_JSON: 1530 /* JSON and JSONB output will already be escaped */ 1531 outputstr = OidOutputFunctionCall(outfuncoid, val); 1532 appendStringInfoString(result, outputstr); 1533 pfree(outputstr); 1534 break; 1535 case JSONTYPE_CAST: 1536 /* outfuncoid refers to a cast function, not an output function */ 1537 jsontext = DatumGetTextPP(OidFunctionCall1(outfuncoid, val)); 1538 outputstr = text_to_cstring(jsontext); 1539 appendStringInfoString(result, outputstr); 1540 pfree(outputstr); 1541 pfree(jsontext); 1542 break; 1543 default: 1544 outputstr = OidOutputFunctionCall(outfuncoid, val); 1545 escape_json(result, outputstr); 1546 pfree(outputstr); 1547 break; 1548 } 1549 } 1550 1551 /* 1552 * Encode 'value' of datetime type 'typid' into JSON string in ISO format using 1553 * optionally preallocated buffer 'buf'. 1554 */ 1555 char * 1556 JsonEncodeDateTime(char *buf, Datum value, Oid typid) 1557 { 1558 if (!buf) 1559 buf = palloc(MAXDATELEN + 1); 1560 1561 switch (typid) 1562 { 1563 case DATEOID: 1564 { 1565 DateADT date; 1566 struct pg_tm tm; 1567 1568 date = DatumGetDateADT(value); 1569 1570 /* Same as date_out(), but forcing DateStyle */ 1571 if (DATE_NOT_FINITE(date)) 1572 EncodeSpecialDate(date, buf); 1573 else 1574 { 1575 j2date(date + POSTGRES_EPOCH_JDATE, 1576 &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday)); 1577 EncodeDateOnly(&tm, USE_XSD_DATES, buf); 1578 } 1579 } 1580 break; 1581 case TIMEOID: 1582 { 1583 TimeADT time = DatumGetTimeADT(value); 1584 struct pg_tm tt, 1585 *tm = &tt; 1586 fsec_t fsec; 1587 1588 /* Same as time_out(), but forcing DateStyle */ 1589 time2tm(time, tm, &fsec); 1590 EncodeTimeOnly(tm, fsec, false, 0, USE_XSD_DATES, buf); 1591 } 1592 break; 1593 case TIMETZOID: 1594 { 1595 TimeTzADT *time = DatumGetTimeTzADTP(value); 1596 struct pg_tm tt, 1597 *tm = &tt; 1598 fsec_t fsec; 1599 int tz; 1600 1601 /* Same as timetz_out(), but forcing DateStyle */ 1602 timetz2tm(time, tm, &fsec, &tz); 1603 EncodeTimeOnly(tm, fsec, true, tz, USE_XSD_DATES, buf); 1604 } 1605 break; 1606 case TIMESTAMPOID: 1607 { 1608 Timestamp timestamp; 1609 struct pg_tm tm; 1610 fsec_t fsec; 1611 1612 timestamp = DatumGetTimestamp(value); 1613 /* Same as timestamp_out(), but forcing DateStyle */ 1614 if (TIMESTAMP_NOT_FINITE(timestamp)) 1615 EncodeSpecialTimestamp(timestamp, buf); 1616 else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0) 1617 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf); 1618 else 1619 ereport(ERROR, 1620 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 1621 errmsg("timestamp out of range"))); 1622 } 1623 break; 1624 case TIMESTAMPTZOID: 1625 { 1626 TimestampTz timestamp; 1627 struct pg_tm tm; 1628 int tz; 1629 fsec_t fsec; 1630 const char *tzn = NULL; 1631 1632 timestamp = DatumGetTimestampTz(value); 1633 /* Same as timestamptz_out(), but forcing DateStyle */ 1634 if (TIMESTAMP_NOT_FINITE(timestamp)) 1635 EncodeSpecialTimestamp(timestamp, buf); 1636 else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0) 1637 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf); 1638 else 1639 ereport(ERROR, 1640 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 1641 errmsg("timestamp out of range"))); 1642 } 1643 break; 1644 default: 1645 elog(ERROR, "unknown jsonb value datetime type oid %d", typid); 1646 return NULL; 1647 } 1648 1649 return buf; 1650 } 1651 1652 /* 1653 * Process a single dimension of an array. 1654 * If it's the innermost dimension, output the values, otherwise call 1655 * ourselves recursively to process the next dimension. 1656 */ 1657 static void 1658 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals, 1659 bool *nulls, int *valcount, JsonTypeCategory tcategory, 1660 Oid outfuncoid, bool use_line_feeds) 1661 { 1662 int i; 1663 const char *sep; 1664 1665 Assert(dim < ndims); 1666 1667 sep = use_line_feeds ? ",\n " : ","; 1668 1669 appendStringInfoChar(result, '['); 1670 1671 for (i = 1; i <= dims[dim]; i++) 1672 { 1673 if (i > 1) 1674 appendStringInfoString(result, sep); 1675 1676 if (dim + 1 == ndims) 1677 { 1678 datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory, 1679 outfuncoid, false); 1680 (*valcount)++; 1681 } 1682 else 1683 { 1684 /* 1685 * Do we want line feeds on inner dimensions of arrays? For now 1686 * we'll say no. 1687 */ 1688 array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls, 1689 valcount, tcategory, outfuncoid, false); 1690 } 1691 } 1692 1693 appendStringInfoChar(result, ']'); 1694 } 1695 1696 /* 1697 * Turn an array into JSON. 1698 */ 1699 static void 1700 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds) 1701 { 1702 ArrayType *v = DatumGetArrayTypeP(array); 1703 Oid element_type = ARR_ELEMTYPE(v); 1704 int *dim; 1705 int ndim; 1706 int nitems; 1707 int count = 0; 1708 Datum *elements; 1709 bool *nulls; 1710 int16 typlen; 1711 bool typbyval; 1712 char typalign; 1713 JsonTypeCategory tcategory; 1714 Oid outfuncoid; 1715 1716 ndim = ARR_NDIM(v); 1717 dim = ARR_DIMS(v); 1718 nitems = ArrayGetNItems(ndim, dim); 1719 1720 if (nitems <= 0) 1721 { 1722 appendStringInfoString(result, "[]"); 1723 return; 1724 } 1725 1726 get_typlenbyvalalign(element_type, 1727 &typlen, &typbyval, &typalign); 1728 1729 json_categorize_type(element_type, 1730 &tcategory, &outfuncoid); 1731 1732 deconstruct_array(v, element_type, typlen, typbyval, 1733 typalign, &elements, &nulls, 1734 &nitems); 1735 1736 array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory, 1737 outfuncoid, use_line_feeds); 1738 1739 pfree(elements); 1740 pfree(nulls); 1741 } 1742 1743 /* 1744 * Turn a composite / record into JSON. 1745 */ 1746 static void 1747 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) 1748 { 1749 HeapTupleHeader td; 1750 Oid tupType; 1751 int32 tupTypmod; 1752 TupleDesc tupdesc; 1753 HeapTupleData tmptup, 1754 *tuple; 1755 int i; 1756 bool needsep = false; 1757 const char *sep; 1758 1759 sep = use_line_feeds ? ",\n " : ","; 1760 1761 td = DatumGetHeapTupleHeader(composite); 1762 1763 /* Extract rowtype info and find a tupdesc */ 1764 tupType = HeapTupleHeaderGetTypeId(td); 1765 tupTypmod = HeapTupleHeaderGetTypMod(td); 1766 tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); 1767 1768 /* Build a temporary HeapTuple control structure */ 1769 tmptup.t_len = HeapTupleHeaderGetDatumLength(td); 1770 tmptup.t_data = td; 1771 tuple = &tmptup; 1772 1773 appendStringInfoChar(result, '{'); 1774 1775 for (i = 0; i < tupdesc->natts; i++) 1776 { 1777 Datum val; 1778 bool isnull; 1779 char *attname; 1780 JsonTypeCategory tcategory; 1781 Oid outfuncoid; 1782 Form_pg_attribute att = TupleDescAttr(tupdesc, i); 1783 1784 if (att->attisdropped) 1785 continue; 1786 1787 if (needsep) 1788 appendStringInfoString(result, sep); 1789 needsep = true; 1790 1791 attname = NameStr(att->attname); 1792 escape_json(result, attname); 1793 appendStringInfoChar(result, ':'); 1794 1795 val = heap_getattr(tuple, i + 1, tupdesc, &isnull); 1796 1797 if (isnull) 1798 { 1799 tcategory = JSONTYPE_NULL; 1800 outfuncoid = InvalidOid; 1801 } 1802 else 1803 json_categorize_type(att->atttypid, &tcategory, &outfuncoid); 1804 1805 datum_to_json(val, isnull, result, tcategory, outfuncoid, false); 1806 } 1807 1808 appendStringInfoChar(result, '}'); 1809 ReleaseTupleDesc(tupdesc); 1810 } 1811 1812 /* 1813 * Append JSON text for "val" to "result". 1814 * 1815 * This is just a thin wrapper around datum_to_json. If the same type will be 1816 * printed many times, avoid using this; better to do the json_categorize_type 1817 * lookups only once. 1818 */ 1819 static void 1820 add_json(Datum val, bool is_null, StringInfo result, 1821 Oid val_type, bool key_scalar) 1822 { 1823 JsonTypeCategory tcategory; 1824 Oid outfuncoid; 1825 1826 if (val_type == InvalidOid) 1827 ereport(ERROR, 1828 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 1829 errmsg("could not determine input data type"))); 1830 1831 if (is_null) 1832 { 1833 tcategory = JSONTYPE_NULL; 1834 outfuncoid = InvalidOid; 1835 } 1836 else 1837 json_categorize_type(val_type, 1838 &tcategory, &outfuncoid); 1839 1840 datum_to_json(val, is_null, result, tcategory, outfuncoid, key_scalar); 1841 } 1842 1843 /* 1844 * SQL function array_to_json(row) 1845 */ 1846 Datum 1847 array_to_json(PG_FUNCTION_ARGS) 1848 { 1849 Datum array = PG_GETARG_DATUM(0); 1850 StringInfo result; 1851 1852 result = makeStringInfo(); 1853 1854 array_to_json_internal(array, result, false); 1855 1856 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); 1857 } 1858 1859 /* 1860 * SQL function array_to_json(row, prettybool) 1861 */ 1862 Datum 1863 array_to_json_pretty(PG_FUNCTION_ARGS) 1864 { 1865 Datum array = PG_GETARG_DATUM(0); 1866 bool use_line_feeds = PG_GETARG_BOOL(1); 1867 StringInfo result; 1868 1869 result = makeStringInfo(); 1870 1871 array_to_json_internal(array, result, use_line_feeds); 1872 1873 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); 1874 } 1875 1876 /* 1877 * SQL function row_to_json(row) 1878 */ 1879 Datum 1880 row_to_json(PG_FUNCTION_ARGS) 1881 { 1882 Datum array = PG_GETARG_DATUM(0); 1883 StringInfo result; 1884 1885 result = makeStringInfo(); 1886 1887 composite_to_json(array, result, false); 1888 1889 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); 1890 } 1891 1892 /* 1893 * SQL function row_to_json(row, prettybool) 1894 */ 1895 Datum 1896 row_to_json_pretty(PG_FUNCTION_ARGS) 1897 { 1898 Datum array = PG_GETARG_DATUM(0); 1899 bool use_line_feeds = PG_GETARG_BOOL(1); 1900 StringInfo result; 1901 1902 result = makeStringInfo(); 1903 1904 composite_to_json(array, result, use_line_feeds); 1905 1906 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); 1907 } 1908 1909 /* 1910 * SQL function to_json(anyvalue) 1911 */ 1912 Datum 1913 to_json(PG_FUNCTION_ARGS) 1914 { 1915 Datum val = PG_GETARG_DATUM(0); 1916 Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0); 1917 StringInfo result; 1918 JsonTypeCategory tcategory; 1919 Oid outfuncoid; 1920 1921 if (val_type == InvalidOid) 1922 ereport(ERROR, 1923 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 1924 errmsg("could not determine input data type"))); 1925 1926 json_categorize_type(val_type, 1927 &tcategory, &outfuncoid); 1928 1929 result = makeStringInfo(); 1930 1931 datum_to_json(val, false, result, tcategory, outfuncoid, false); 1932 1933 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); 1934 } 1935 1936 /* 1937 * json_agg transition function 1938 * 1939 * aggregate input column as a json array value. 1940 */ 1941 Datum 1942 json_agg_transfn(PG_FUNCTION_ARGS) 1943 { 1944 MemoryContext aggcontext, 1945 oldcontext; 1946 JsonAggState *state; 1947 Datum val; 1948 1949 if (!AggCheckCallContext(fcinfo, &aggcontext)) 1950 { 1951 /* cannot be called directly because of internal-type argument */ 1952 elog(ERROR, "json_agg_transfn called in non-aggregate context"); 1953 } 1954 1955 if (PG_ARGISNULL(0)) 1956 { 1957 Oid arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1); 1958 1959 if (arg_type == InvalidOid) 1960 ereport(ERROR, 1961 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 1962 errmsg("could not determine input data type"))); 1963 1964 /* 1965 * Make this state object in a context where it will persist for the 1966 * duration of the aggregate call. MemoryContextSwitchTo is only 1967 * needed the first time, as the StringInfo routines make sure they 1968 * use the right context to enlarge the object if necessary. 1969 */ 1970 oldcontext = MemoryContextSwitchTo(aggcontext); 1971 state = (JsonAggState *) palloc(sizeof(JsonAggState)); 1972 state->str = makeStringInfo(); 1973 MemoryContextSwitchTo(oldcontext); 1974 1975 appendStringInfoChar(state->str, '['); 1976 json_categorize_type(arg_type, &state->val_category, 1977 &state->val_output_func); 1978 } 1979 else 1980 { 1981 state = (JsonAggState *) PG_GETARG_POINTER(0); 1982 appendStringInfoString(state->str, ", "); 1983 } 1984 1985 /* fast path for NULLs */ 1986 if (PG_ARGISNULL(1)) 1987 { 1988 datum_to_json((Datum) 0, true, state->str, JSONTYPE_NULL, 1989 InvalidOid, false); 1990 PG_RETURN_POINTER(state); 1991 } 1992 1993 val = PG_GETARG_DATUM(1); 1994 1995 /* add some whitespace if structured type and not first item */ 1996 if (!PG_ARGISNULL(0) && 1997 (state->val_category == JSONTYPE_ARRAY || 1998 state->val_category == JSONTYPE_COMPOSITE)) 1999 { 2000 appendStringInfoString(state->str, "\n "); 2001 } 2002 2003 datum_to_json(val, false, state->str, state->val_category, 2004 state->val_output_func, false); 2005 2006 /* 2007 * The transition type for json_agg() is declared to be "internal", which 2008 * is a pass-by-value type the same size as a pointer. So we can safely 2009 * pass the JsonAggState pointer through nodeAgg.c's machinations. 2010 */ 2011 PG_RETURN_POINTER(state); 2012 } 2013 2014 /* 2015 * json_agg final function 2016 */ 2017 Datum 2018 json_agg_finalfn(PG_FUNCTION_ARGS) 2019 { 2020 JsonAggState *state; 2021 2022 /* cannot be called directly because of internal-type argument */ 2023 Assert(AggCheckCallContext(fcinfo, NULL)); 2024 2025 state = PG_ARGISNULL(0) ? 2026 NULL : 2027 (JsonAggState *) PG_GETARG_POINTER(0); 2028 2029 /* NULL result for no rows in, as is standard with aggregates */ 2030 if (state == NULL) 2031 PG_RETURN_NULL(); 2032 2033 /* Else return state with appropriate array terminator added */ 2034 PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]")); 2035 } 2036 2037 /* 2038 * json_object_agg transition function. 2039 * 2040 * aggregate two input columns as a single json object value. 2041 */ 2042 Datum 2043 json_object_agg_transfn(PG_FUNCTION_ARGS) 2044 { 2045 MemoryContext aggcontext, 2046 oldcontext; 2047 JsonAggState *state; 2048 Datum arg; 2049 2050 if (!AggCheckCallContext(fcinfo, &aggcontext)) 2051 { 2052 /* cannot be called directly because of internal-type argument */ 2053 elog(ERROR, "json_object_agg_transfn called in non-aggregate context"); 2054 } 2055 2056 if (PG_ARGISNULL(0)) 2057 { 2058 Oid arg_type; 2059 2060 /* 2061 * Make the StringInfo in a context where it will persist for the 2062 * duration of the aggregate call. Switching context is only needed 2063 * for this initial step, as the StringInfo routines make sure they 2064 * use the right context to enlarge the object if necessary. 2065 */ 2066 oldcontext = MemoryContextSwitchTo(aggcontext); 2067 state = (JsonAggState *) palloc(sizeof(JsonAggState)); 2068 state->str = makeStringInfo(); 2069 MemoryContextSwitchTo(oldcontext); 2070 2071 arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1); 2072 2073 if (arg_type == InvalidOid) 2074 ereport(ERROR, 2075 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 2076 errmsg("could not determine data type for argument %d", 1))); 2077 2078 json_categorize_type(arg_type, &state->key_category, 2079 &state->key_output_func); 2080 2081 arg_type = get_fn_expr_argtype(fcinfo->flinfo, 2); 2082 2083 if (arg_type == InvalidOid) 2084 ereport(ERROR, 2085 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 2086 errmsg("could not determine data type for argument %d", 2))); 2087 2088 json_categorize_type(arg_type, &state->val_category, 2089 &state->val_output_func); 2090 2091 appendStringInfoString(state->str, "{ "); 2092 } 2093 else 2094 { 2095 state = (JsonAggState *) PG_GETARG_POINTER(0); 2096 appendStringInfoString(state->str, ", "); 2097 } 2098 2099 /* 2100 * Note: since json_object_agg() is declared as taking type "any", the 2101 * parser will not do any type conversion on unknown-type literals (that 2102 * is, undecorated strings or NULLs). Such values will arrive here as 2103 * type UNKNOWN, which fortunately does not matter to us, since 2104 * unknownout() works fine. 2105 */ 2106 2107 if (PG_ARGISNULL(1)) 2108 ereport(ERROR, 2109 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 2110 errmsg("field name must not be null"))); 2111 2112 arg = PG_GETARG_DATUM(1); 2113 2114 datum_to_json(arg, false, state->str, state->key_category, 2115 state->key_output_func, true); 2116 2117 appendStringInfoString(state->str, " : "); 2118 2119 if (PG_ARGISNULL(2)) 2120 arg = (Datum) 0; 2121 else 2122 arg = PG_GETARG_DATUM(2); 2123 2124 datum_to_json(arg, PG_ARGISNULL(2), state->str, state->val_category, 2125 state->val_output_func, false); 2126 2127 PG_RETURN_POINTER(state); 2128 } 2129 2130 /* 2131 * json_object_agg final function. 2132 */ 2133 Datum 2134 json_object_agg_finalfn(PG_FUNCTION_ARGS) 2135 { 2136 JsonAggState *state; 2137 2138 /* cannot be called directly because of internal-type argument */ 2139 Assert(AggCheckCallContext(fcinfo, NULL)); 2140 2141 state = PG_ARGISNULL(0) ? NULL : (JsonAggState *) PG_GETARG_POINTER(0); 2142 2143 /* NULL result for no rows in, as is standard with aggregates */ 2144 if (state == NULL) 2145 PG_RETURN_NULL(); 2146 2147 /* Else return state with appropriate object terminator added */ 2148 PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, " }")); 2149 } 2150 2151 /* 2152 * Helper function for aggregates: return given StringInfo's contents plus 2153 * specified trailing string, as a text datum. We need this because aggregate 2154 * final functions are not allowed to modify the aggregate state. 2155 */ 2156 static text * 2157 catenate_stringinfo_string(StringInfo buffer, const char *addon) 2158 { 2159 /* custom version of cstring_to_text_with_len */ 2160 int buflen = buffer->len; 2161 int addlen = strlen(addon); 2162 text *result = (text *) palloc(buflen + addlen + VARHDRSZ); 2163 2164 SET_VARSIZE(result, buflen + addlen + VARHDRSZ); 2165 memcpy(VARDATA(result), buffer->data, buflen); 2166 memcpy(VARDATA(result) + buflen, addon, addlen); 2167 2168 return result; 2169 } 2170 2171 /* 2172 * SQL function json_build_object(variadic "any") 2173 */ 2174 Datum 2175 json_build_object(PG_FUNCTION_ARGS) 2176 { 2177 int nargs = PG_NARGS(); 2178 int i; 2179 const char *sep = ""; 2180 StringInfo result; 2181 Datum *args; 2182 bool *nulls; 2183 Oid *types; 2184 2185 /* fetch argument values to build the object */ 2186 nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls); 2187 2188 if (nargs < 0) 2189 PG_RETURN_NULL(); 2190 2191 if (nargs % 2 != 0) 2192 ereport(ERROR, 2193 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 2194 errmsg("argument list must have even number of elements"), 2195 /* translator: %s is a SQL function name */ 2196 errhint("The arguments of %s must consist of alternating keys and values.", 2197 "json_build_object()"))); 2198 2199 result = makeStringInfo(); 2200 2201 appendStringInfoChar(result, '{'); 2202 2203 for (i = 0; i < nargs; i += 2) 2204 { 2205 appendStringInfoString(result, sep); 2206 sep = ", "; 2207 2208 /* process key */ 2209 if (nulls[i]) 2210 ereport(ERROR, 2211 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 2212 errmsg("argument %d cannot be null", i + 1), 2213 errhint("Object keys should be text."))); 2214 2215 add_json(args[i], false, result, types[i], true); 2216 2217 appendStringInfoString(result, " : "); 2218 2219 /* process value */ 2220 add_json(args[i + 1], nulls[i + 1], result, types[i + 1], false); 2221 } 2222 2223 appendStringInfoChar(result, '}'); 2224 2225 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); 2226 } 2227 2228 /* 2229 * degenerate case of json_build_object where it gets 0 arguments. 2230 */ 2231 Datum 2232 json_build_object_noargs(PG_FUNCTION_ARGS) 2233 { 2234 PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2)); 2235 } 2236 2237 /* 2238 * SQL function json_build_array(variadic "any") 2239 */ 2240 Datum 2241 json_build_array(PG_FUNCTION_ARGS) 2242 { 2243 int nargs; 2244 int i; 2245 const char *sep = ""; 2246 StringInfo result; 2247 Datum *args; 2248 bool *nulls; 2249 Oid *types; 2250 2251 /* fetch argument values to build the array */ 2252 nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls); 2253 2254 if (nargs < 0) 2255 PG_RETURN_NULL(); 2256 2257 result = makeStringInfo(); 2258 2259 appendStringInfoChar(result, '['); 2260 2261 for (i = 0; i < nargs; i++) 2262 { 2263 appendStringInfoString(result, sep); 2264 sep = ", "; 2265 add_json(args[i], nulls[i], result, types[i], false); 2266 } 2267 2268 appendStringInfoChar(result, ']'); 2269 2270 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len)); 2271 } 2272 2273 /* 2274 * degenerate case of json_build_array where it gets 0 arguments. 2275 */ 2276 Datum 2277 json_build_array_noargs(PG_FUNCTION_ARGS) 2278 { 2279 PG_RETURN_TEXT_P(cstring_to_text_with_len("[]", 2)); 2280 } 2281 2282 /* 2283 * SQL function json_object(text[]) 2284 * 2285 * take a one or two dimensional array of text as key/value pairs 2286 * for a json object. 2287 */ 2288 Datum 2289 json_object(PG_FUNCTION_ARGS) 2290 { 2291 ArrayType *in_array = PG_GETARG_ARRAYTYPE_P(0); 2292 int ndims = ARR_NDIM(in_array); 2293 StringInfoData result; 2294 Datum *in_datums; 2295 bool *in_nulls; 2296 int in_count, 2297 count, 2298 i; 2299 text *rval; 2300 char *v; 2301 2302 switch (ndims) 2303 { 2304 case 0: 2305 PG_RETURN_DATUM(CStringGetTextDatum("{}")); 2306 break; 2307 2308 case 1: 2309 if ((ARR_DIMS(in_array)[0]) % 2) 2310 ereport(ERROR, 2311 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), 2312 errmsg("array must have even number of elements"))); 2313 break; 2314 2315 case 2: 2316 if ((ARR_DIMS(in_array)[1]) != 2) 2317 ereport(ERROR, 2318 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), 2319 errmsg("array must have two columns"))); 2320 break; 2321 2322 default: 2323 ereport(ERROR, 2324 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), 2325 errmsg("wrong number of array subscripts"))); 2326 } 2327 2328 deconstruct_array(in_array, 2329 TEXTOID, -1, false, 'i', 2330 &in_datums, &in_nulls, &in_count); 2331 2332 count = in_count / 2; 2333 2334 initStringInfo(&result); 2335 2336 appendStringInfoChar(&result, '{'); 2337 2338 for (i = 0; i < count; ++i) 2339 { 2340 if (in_nulls[i * 2]) 2341 ereport(ERROR, 2342 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), 2343 errmsg("null value not allowed for object key"))); 2344 2345 v = TextDatumGetCString(in_datums[i * 2]); 2346 if (i > 0) 2347 appendStringInfoString(&result, ", "); 2348 escape_json(&result, v); 2349 appendStringInfoString(&result, " : "); 2350 pfree(v); 2351 if (in_nulls[i * 2 + 1]) 2352 appendStringInfoString(&result, "null"); 2353 else 2354 { 2355 v = TextDatumGetCString(in_datums[i * 2 + 1]); 2356 escape_json(&result, v); 2357 pfree(v); 2358 } 2359 } 2360 2361 appendStringInfoChar(&result, '}'); 2362 2363 pfree(in_datums); 2364 pfree(in_nulls); 2365 2366 rval = cstring_to_text_with_len(result.data, result.len); 2367 pfree(result.data); 2368 2369 PG_RETURN_TEXT_P(rval); 2370 2371 } 2372 2373 /* 2374 * SQL function json_object(text[], text[]) 2375 * 2376 * take separate key and value arrays of text to construct a json object 2377 * pairwise. 2378 */ 2379 Datum 2380 json_object_two_arg(PG_FUNCTION_ARGS) 2381 { 2382 ArrayType *key_array = PG_GETARG_ARRAYTYPE_P(0); 2383 ArrayType *val_array = PG_GETARG_ARRAYTYPE_P(1); 2384 int nkdims = ARR_NDIM(key_array); 2385 int nvdims = ARR_NDIM(val_array); 2386 StringInfoData result; 2387 Datum *key_datums, 2388 *val_datums; 2389 bool *key_nulls, 2390 *val_nulls; 2391 int key_count, 2392 val_count, 2393 i; 2394 text *rval; 2395 char *v; 2396 2397 if (nkdims > 1 || nkdims != nvdims) 2398 ereport(ERROR, 2399 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), 2400 errmsg("wrong number of array subscripts"))); 2401 2402 if (nkdims == 0) 2403 PG_RETURN_DATUM(CStringGetTextDatum("{}")); 2404 2405 deconstruct_array(key_array, 2406 TEXTOID, -1, false, 'i', 2407 &key_datums, &key_nulls, &key_count); 2408 2409 deconstruct_array(val_array, 2410 TEXTOID, -1, false, 'i', 2411 &val_datums, &val_nulls, &val_count); 2412 2413 if (key_count != val_count) 2414 ereport(ERROR, 2415 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), 2416 errmsg("mismatched array dimensions"))); 2417 2418 initStringInfo(&result); 2419 2420 appendStringInfoChar(&result, '{'); 2421 2422 for (i = 0; i < key_count; ++i) 2423 { 2424 if (key_nulls[i]) 2425 ereport(ERROR, 2426 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), 2427 errmsg("null value not allowed for object key"))); 2428 2429 v = TextDatumGetCString(key_datums[i]); 2430 if (i > 0) 2431 appendStringInfoString(&result, ", "); 2432 escape_json(&result, v); 2433 appendStringInfoString(&result, " : "); 2434 pfree(v); 2435 if (val_nulls[i]) 2436 appendStringInfoString(&result, "null"); 2437 else 2438 { 2439 v = TextDatumGetCString(val_datums[i]); 2440 escape_json(&result, v); 2441 pfree(v); 2442 } 2443 } 2444 2445 appendStringInfoChar(&result, '}'); 2446 2447 pfree(key_datums); 2448 pfree(key_nulls); 2449 pfree(val_datums); 2450 pfree(val_nulls); 2451 2452 rval = cstring_to_text_with_len(result.data, result.len); 2453 pfree(result.data); 2454 2455 PG_RETURN_TEXT_P(rval); 2456 } 2457 2458 2459 /* 2460 * Produce a JSON string literal, properly escaping characters in the text. 2461 */ 2462 void 2463 escape_json(StringInfo buf, const char *str) 2464 { 2465 const char *p; 2466 2467 appendStringInfoCharMacro(buf, '"'); 2468 for (p = str; *p; p++) 2469 { 2470 switch (*p) 2471 { 2472 case '\b': 2473 appendStringInfoString(buf, "\\b"); 2474 break; 2475 case '\f': 2476 appendStringInfoString(buf, "\\f"); 2477 break; 2478 case '\n': 2479 appendStringInfoString(buf, "\\n"); 2480 break; 2481 case '\r': 2482 appendStringInfoString(buf, "\\r"); 2483 break; 2484 case '\t': 2485 appendStringInfoString(buf, "\\t"); 2486 break; 2487 case '"': 2488 appendStringInfoString(buf, "\\\""); 2489 break; 2490 case '\\': 2491 appendStringInfoString(buf, "\\\\"); 2492 break; 2493 default: 2494 if ((unsigned char) *p < ' ') 2495 appendStringInfo(buf, "\\u%04x", (int) *p); 2496 else 2497 appendStringInfoCharMacro(buf, *p); 2498 break; 2499 } 2500 } 2501 appendStringInfoCharMacro(buf, '"'); 2502 } 2503 2504 /* 2505 * SQL function json_typeof(json) -> text 2506 * 2507 * Returns the type of the outermost JSON value as TEXT. Possible types are 2508 * "object", "array", "string", "number", "boolean", and "null". 2509 * 2510 * Performs a single call to json_lex() to get the first token of the supplied 2511 * value. This initial token uniquely determines the value's type. As our 2512 * input must already have been validated by json_in() or json_recv(), the 2513 * initial token should never be JSON_TOKEN_OBJECT_END, JSON_TOKEN_ARRAY_END, 2514 * JSON_TOKEN_COLON, JSON_TOKEN_COMMA, or JSON_TOKEN_END. 2515 */ 2516 Datum 2517 json_typeof(PG_FUNCTION_ARGS) 2518 { 2519 text *json; 2520 2521 JsonLexContext *lex; 2522 JsonTokenType tok; 2523 char *type; 2524 2525 json = PG_GETARG_TEXT_PP(0); 2526 lex = makeJsonLexContext(json, false); 2527 2528 /* Lex exactly one token from the input and check its type. */ 2529 json_lex(lex); 2530 tok = lex_peek(lex); 2531 switch (tok) 2532 { 2533 case JSON_TOKEN_OBJECT_START: 2534 type = "object"; 2535 break; 2536 case JSON_TOKEN_ARRAY_START: 2537 type = "array"; 2538 break; 2539 case JSON_TOKEN_STRING: 2540 type = "string"; 2541 break; 2542 case JSON_TOKEN_NUMBER: 2543 type = "number"; 2544 break; 2545 case JSON_TOKEN_TRUE: 2546 case JSON_TOKEN_FALSE: 2547 type = "boolean"; 2548 break; 2549 case JSON_TOKEN_NULL: 2550 type = "null"; 2551 break; 2552 default: 2553 elog(ERROR, "unexpected json token: %d", tok); 2554 } 2555 2556 PG_RETURN_TEXT_P(cstring_to_text(type)); 2557 } 2558