1--
2-- encoding-sensitive tests for json and jsonb
3--
4-- We provide expected-results files for UTF8 (json_encoding.out)
5-- and for SQL_ASCII (json_encoding_1.out).  Skip otherwise.
6SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
7       AS skip_test \gset
8\if :skip_test
9\quit
10\endif
11SELECT getdatabaseencoding();           -- just to label the results files
12 getdatabaseencoding
13---------------------
14 UTF8
15(1 row)
16
17-- first json
18-- basic unicode input
19SELECT '"\u"'::json;			-- ERROR, incomplete escape
20ERROR:  invalid input syntax for type json
21LINE 1: SELECT '"\u"'::json;
22               ^
23DETAIL:  "\u" must be followed by four hexadecimal digits.
24CONTEXT:  JSON data, line 1: "\u"
25SELECT '"\u00"'::json;			-- ERROR, incomplete escape
26ERROR:  invalid input syntax for type json
27LINE 1: SELECT '"\u00"'::json;
28               ^
29DETAIL:  "\u" must be followed by four hexadecimal digits.
30CONTEXT:  JSON data, line 1: "\u00"
31SELECT '"\u000g"'::json;		-- ERROR, g is not a hex digit
32ERROR:  invalid input syntax for type json
33LINE 1: SELECT '"\u000g"'::json;
34               ^
35DETAIL:  "\u" must be followed by four hexadecimal digits.
36CONTEXT:  JSON data, line 1: "\u000g...
37SELECT '"\u0000"'::json;		-- OK, legal escape
38   json
39----------
40 "\u0000"
41(1 row)
42
43SELECT '"\uaBcD"'::json;		-- OK, uppercase and lower case both OK
44   json
45----------
46 "\uaBcD"
47(1 row)
48
49-- handling of unicode surrogate pairs
50select json '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
51      correct_in_utf8
52----------------------------
53 "\ud83d\ude04\ud83d\udc36"
54(1 row)
55
56select json '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
57ERROR:  invalid input syntax for type json
58DETAIL:  Unicode high surrogate must not follow a high surrogate.
59CONTEXT:  JSON data, line 1: { "a":...
60select json '{ "a":  "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
61ERROR:  invalid input syntax for type json
62DETAIL:  Unicode low surrogate must follow a high surrogate.
63CONTEXT:  JSON data, line 1: { "a":...
64select json '{ "a":  "\ud83dX" }' -> 'a'; -- orphan high surrogate
65ERROR:  invalid input syntax for type json
66DETAIL:  Unicode low surrogate must follow a high surrogate.
67CONTEXT:  JSON data, line 1: { "a":...
68select json '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
69ERROR:  invalid input syntax for type json
70DETAIL:  Unicode low surrogate must follow a high surrogate.
71CONTEXT:  JSON data, line 1: { "a":...
72--handling of simple unicode escapes
73select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
74            correct_in_utf8
75---------------------------------------
76 { "a":  "the Copyright \u00a9 sign" }
77(1 row)
78
79select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
80         correct_everywhere
81-------------------------------------
82 { "a":  "dollar \u0024 character" }
83(1 row)
84
85select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
86            not_an_escape
87--------------------------------------
88 { "a":  "dollar \\u0024 character" }
89(1 row)
90
91select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
92         not_unescaped
93--------------------------------
94 { "a":  "null \u0000 escape" }
95(1 row)
96
97select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
98          not_an_escape
99---------------------------------
100 { "a":  "null \\u0000 escape" }
101(1 row)
102
103select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
104   correct_in_utf8
105----------------------
106 the Copyright © sign
107(1 row)
108
109select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
110 correct_everywhere
111--------------------
112 dollar $ character
113(1 row)
114
115select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
116      not_an_escape
117-------------------------
118 dollar \u0024 character
119(1 row)
120
121select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
122ERROR:  unsupported Unicode escape sequence
123DETAIL:  \u0000 cannot be converted to text.
124CONTEXT:  JSON data, line 1: { "a":...
125select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
126   not_an_escape
127--------------------
128 null \u0000 escape
129(1 row)
130
131-- then jsonb
132-- basic unicode input
133SELECT '"\u"'::jsonb;			-- ERROR, incomplete escape
134ERROR:  invalid input syntax for type json
135LINE 1: SELECT '"\u"'::jsonb;
136               ^
137DETAIL:  "\u" must be followed by four hexadecimal digits.
138CONTEXT:  JSON data, line 1: "\u"
139SELECT '"\u00"'::jsonb;			-- ERROR, incomplete escape
140ERROR:  invalid input syntax for type json
141LINE 1: SELECT '"\u00"'::jsonb;
142               ^
143DETAIL:  "\u" must be followed by four hexadecimal digits.
144CONTEXT:  JSON data, line 1: "\u00"
145SELECT '"\u000g"'::jsonb;		-- ERROR, g is not a hex digit
146ERROR:  invalid input syntax for type json
147LINE 1: SELECT '"\u000g"'::jsonb;
148               ^
149DETAIL:  "\u" must be followed by four hexadecimal digits.
150CONTEXT:  JSON data, line 1: "\u000g...
151SELECT '"\u0045"'::jsonb;		-- OK, legal escape
152 jsonb
153-------
154 "E"
155(1 row)
156
157SELECT '"\u0000"'::jsonb;		-- ERROR, we don't support U+0000
158ERROR:  unsupported Unicode escape sequence
159LINE 1: SELECT '"\u0000"'::jsonb;
160               ^
161DETAIL:  \u0000 cannot be converted to text.
162CONTEXT:  JSON data, line 1: ...
163-- use octet_length here so we don't get an odd unicode char in the
164-- output
165SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
166 octet_length
167--------------
168            5
169(1 row)
170
171-- handling of unicode surrogate pairs
172SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
173 correct_in_utf8
174-----------------
175              10
176(1 row)
177
178SELECT jsonb '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
179ERROR:  invalid input syntax for type json
180LINE 1: SELECT jsonb '{ "a":  "\ud83d\ud83d" }' -> 'a';
181                     ^
182DETAIL:  Unicode high surrogate must not follow a high surrogate.
183CONTEXT:  JSON data, line 1: { "a":...
184SELECT jsonb '{ "a":  "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
185ERROR:  invalid input syntax for type json
186LINE 1: SELECT jsonb '{ "a":  "\ude04\ud83d" }' -> 'a';
187                     ^
188DETAIL:  Unicode low surrogate must follow a high surrogate.
189CONTEXT:  JSON data, line 1: { "a":...
190SELECT jsonb '{ "a":  "\ud83dX" }' -> 'a'; -- orphan high surrogate
191ERROR:  invalid input syntax for type json
192LINE 1: SELECT jsonb '{ "a":  "\ud83dX" }' -> 'a';
193                     ^
194DETAIL:  Unicode low surrogate must follow a high surrogate.
195CONTEXT:  JSON data, line 1: { "a":...
196SELECT jsonb '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
197ERROR:  invalid input syntax for type json
198LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
199                     ^
200DETAIL:  Unicode low surrogate must follow a high surrogate.
201CONTEXT:  JSON data, line 1: { "a":...
202-- handling of simple unicode escapes
203SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
204        correct_in_utf8
205-------------------------------
206 {"a": "the Copyright © sign"}
207(1 row)
208
209SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
210     correct_everywhere
211-----------------------------
212 {"a": "dollar $ character"}
213(1 row)
214
215SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
216           not_an_escape
217-----------------------------------
218 {"a": "dollar \\u0024 character"}
219(1 row)
220
221SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
222ERROR:  unsupported Unicode escape sequence
223LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
224                     ^
225DETAIL:  \u0000 cannot be converted to text.
226CONTEXT:  JSON data, line 1: { "a":...
227SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
228        not_an_escape
229------------------------------
230 {"a": "null \\u0000 escape"}
231(1 row)
232
233SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
234   correct_in_utf8
235----------------------
236 the Copyright © sign
237(1 row)
238
239SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
240 correct_everywhere
241--------------------
242 dollar $ character
243(1 row)
244
245SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
246      not_an_escape
247-------------------------
248 dollar \u0024 character
249(1 row)
250
251SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
252ERROR:  unsupported Unicode escape sequence
253LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
254                     ^
255DETAIL:  \u0000 cannot be converted to text.
256CONTEXT:  JSON data, line 1: { "a":...
257SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
258   not_an_escape
259--------------------
260 null \u0000 escape
261(1 row)
262
263