1 2-- encoding-sensitive tests for json and jsonb 3 4-- first json 5 6-- basic unicode input 7SELECT '"\u"'::json; -- ERROR, incomplete escape 8SELECT '"\u00"'::json; -- ERROR, incomplete escape 9SELECT '"\u000g"'::json; -- ERROR, g is not a hex digit 10SELECT '"\u0000"'::json; -- OK, legal escape 11SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK 12 13-- handling of unicode surrogate pairs 14 15select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; 16select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row 17select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order 18select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate 19select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate 20 21--handling of simple unicode escapes 22 23select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; 24select json '{ "a": "dollar \u0024 character" }' as correct_everywhere; 25select json '{ "a": "dollar \\u0024 character" }' as not_an_escape; 26select json '{ "a": "null \u0000 escape" }' as not_unescaped; 27select json '{ "a": "null \\u0000 escape" }' as not_an_escape; 28 29select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; 30select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; 31select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; 32select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; 33select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; 34 35-- then jsonb 36 37-- basic unicode input 38SELECT '"\u"'::jsonb; -- ERROR, incomplete escape 39SELECT '"\u00"'::jsonb; -- ERROR, incomplete escape 40SELECT '"\u000g"'::jsonb; -- ERROR, g is not a hex digit 41SELECT '"\u0045"'::jsonb; -- OK, legal escape 42SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000 43-- use octet_length here so we don't get an odd unicode char in the 44-- output 45SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK 46 47-- handling of unicode surrogate pairs 48 49SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8; 50SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row 51SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order 52SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate 53SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate 54 55-- handling of simple unicode escapes 56 57SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; 58SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; 59SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape; 60SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; 61SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; 62 63SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; 64SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; 65SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; 66SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails; 67SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; 68