1-- 2-- encoding-sensitive tests for json and jsonb 3-- 4-- We provide expected-results files for UTF8 (json_encoding.out) 5-- and for SQL_ASCII (json_encoding_1.out). Skip otherwise. 6SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII') 7 AS skip_test \gset 8\if :skip_test 9\quit 10\endif 11SELECT getdatabaseencoding(); -- just to label the results files 12 getdatabaseencoding 13--------------------- 14 UTF8 15(1 row) 16 17-- first json 18-- basic unicode input 19SELECT '"\u"'::json; -- ERROR, incomplete escape 20ERROR: invalid input syntax for type json 21LINE 1: SELECT '"\u"'::json; 22 ^ 23DETAIL: "\u" must be followed by four hexadecimal digits. 24CONTEXT: JSON data, line 1: "\u" 25SELECT '"\u00"'::json; -- ERROR, incomplete escape 26ERROR: invalid input syntax for type json 27LINE 1: SELECT '"\u00"'::json; 28 ^ 29DETAIL: "\u" must be followed by four hexadecimal digits. 30CONTEXT: JSON data, line 1: "\u00" 31SELECT '"\u000g"'::json; -- ERROR, g is not a hex digit 32ERROR: invalid input syntax for type json 33LINE 1: SELECT '"\u000g"'::json; 34 ^ 35DETAIL: "\u" must be followed by four hexadecimal digits. 36CONTEXT: JSON data, line 1: "\u000g... 37SELECT '"\u0000"'::json; -- OK, legal escape 38 json 39---------- 40 "\u0000" 41(1 row) 42 43SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK 44 json 45---------- 46 "\uaBcD" 47(1 row) 48 49-- handling of unicode surrogate pairs 50select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; 51 correct_in_utf8 52---------------------------- 53 "\ud83d\ude04\ud83d\udc36" 54(1 row) 55 56select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row 57ERROR: invalid input syntax for type json 58DETAIL: Unicode high surrogate must not follow a high surrogate. 59CONTEXT: JSON data, line 1: { "a":... 60select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order 61ERROR: invalid input syntax for type json 62DETAIL: Unicode low surrogate must follow a high surrogate. 63CONTEXT: JSON data, line 1: { "a":... 64select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate 65ERROR: invalid input syntax for type json 66DETAIL: Unicode low surrogate must follow a high surrogate. 67CONTEXT: JSON data, line 1: { "a":... 68select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate 69ERROR: invalid input syntax for type json 70DETAIL: Unicode low surrogate must follow a high surrogate. 71CONTEXT: JSON data, line 1: { "a":... 72--handling of simple unicode escapes 73select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; 74 correct_in_utf8 75--------------------------------------- 76 { "a": "the Copyright \u00a9 sign" } 77(1 row) 78 79select json '{ "a": "dollar \u0024 character" }' as correct_everywhere; 80 correct_everywhere 81------------------------------------- 82 { "a": "dollar \u0024 character" } 83(1 row) 84 85select json '{ "a": "dollar \\u0024 character" }' as not_an_escape; 86 not_an_escape 87-------------------------------------- 88 { "a": "dollar \\u0024 character" } 89(1 row) 90 91select json '{ "a": "null \u0000 escape" }' as not_unescaped; 92 not_unescaped 93-------------------------------- 94 { "a": "null \u0000 escape" } 95(1 row) 96 97select json '{ "a": "null \\u0000 escape" }' as not_an_escape; 98 not_an_escape 99--------------------------------- 100 { "a": "null \\u0000 escape" } 101(1 row) 102 103select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; 104 correct_in_utf8 105---------------------- 106 the Copyright © sign 107(1 row) 108 109select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; 110 correct_everywhere 111-------------------- 112 dollar $ character 113(1 row) 114 115select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; 116 not_an_escape 117------------------------- 118 dollar \u0024 character 119(1 row) 120 121select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; 122ERROR: unsupported Unicode escape sequence 123DETAIL: \u0000 cannot be converted to text. 124CONTEXT: JSON data, line 1: { "a":... 125select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; 126 not_an_escape 127-------------------- 128 null \u0000 escape 129(1 row) 130 131-- then jsonb 132-- basic unicode input 133SELECT '"\u"'::jsonb; -- ERROR, incomplete escape 134ERROR: invalid input syntax for type json 135LINE 1: SELECT '"\u"'::jsonb; 136 ^ 137DETAIL: "\u" must be followed by four hexadecimal digits. 138CONTEXT: JSON data, line 1: "\u" 139SELECT '"\u00"'::jsonb; -- ERROR, incomplete escape 140ERROR: invalid input syntax for type json 141LINE 1: SELECT '"\u00"'::jsonb; 142 ^ 143DETAIL: "\u" must be followed by four hexadecimal digits. 144CONTEXT: JSON data, line 1: "\u00" 145SELECT '"\u000g"'::jsonb; -- ERROR, g is not a hex digit 146ERROR: invalid input syntax for type json 147LINE 1: SELECT '"\u000g"'::jsonb; 148 ^ 149DETAIL: "\u" must be followed by four hexadecimal digits. 150CONTEXT: JSON data, line 1: "\u000g... 151SELECT '"\u0045"'::jsonb; -- OK, legal escape 152 jsonb 153------- 154 "E" 155(1 row) 156 157SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000 158ERROR: unsupported Unicode escape sequence 159LINE 1: SELECT '"\u0000"'::jsonb; 160 ^ 161DETAIL: \u0000 cannot be converted to text. 162CONTEXT: JSON data, line 1: ... 163-- use octet_length here so we don't get an odd unicode char in the 164-- output 165SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK 166 octet_length 167-------------- 168 5 169(1 row) 170 171-- handling of unicode surrogate pairs 172SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8; 173 correct_in_utf8 174----------------- 175 10 176(1 row) 177 178SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row 179ERROR: invalid input syntax for type json 180LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; 181 ^ 182DETAIL: Unicode high surrogate must not follow a high surrogate. 183CONTEXT: JSON data, line 1: { "a":... 184SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order 185ERROR: invalid input syntax for type json 186LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; 187 ^ 188DETAIL: Unicode low surrogate must follow a high surrogate. 189CONTEXT: JSON data, line 1: { "a":... 190SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate 191ERROR: invalid input syntax for type json 192LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; 193 ^ 194DETAIL: Unicode low surrogate must follow a high surrogate. 195CONTEXT: JSON data, line 1: { "a":... 196SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate 197ERROR: invalid input syntax for type json 198LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; 199 ^ 200DETAIL: Unicode low surrogate must follow a high surrogate. 201CONTEXT: JSON data, line 1: { "a":... 202-- handling of simple unicode escapes 203SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; 204 correct_in_utf8 205------------------------------- 206 {"a": "the Copyright © sign"} 207(1 row) 208 209SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; 210 correct_everywhere 211----------------------------- 212 {"a": "dollar $ character"} 213(1 row) 214 215SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape; 216 not_an_escape 217----------------------------------- 218 {"a": "dollar \\u0024 character"} 219(1 row) 220 221SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; 222ERROR: unsupported Unicode escape sequence 223LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; 224 ^ 225DETAIL: \u0000 cannot be converted to text. 226CONTEXT: JSON data, line 1: { "a":... 227SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; 228 not_an_escape 229------------------------------ 230 {"a": "null \\u0000 escape"} 231(1 row) 232 233SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; 234 correct_in_utf8 235---------------------- 236 the Copyright © sign 237(1 row) 238 239SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; 240 correct_everywhere 241-------------------- 242 dollar $ character 243(1 row) 244 245SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; 246 not_an_escape 247------------------------- 248 dollar \u0024 character 249(1 row) 250 251SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails; 252ERROR: unsupported Unicode escape sequence 253LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai... 254 ^ 255DETAIL: \u0000 cannot be converted to text. 256CONTEXT: JSON data, line 1: { "a":... 257SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; 258 not_an_escape 259-------------------- 260 null \u0000 escape 261(1 row) 262 263