1# Test unicode strings in key and value contexts
2
3%TestML 0.1.0
4
5###
6# These tests target unicode characters that are handled special or known to be
7# problematic. Test YNY (YAML→Native→YAML) and NYN roundtripping.
8#
9# YAML scalar emission does quoting based on first character, presence of
10# escape characters, and special ambiguous cases like ': '. These ones
11# character strings go a long way towards making sure an implementation is
12# correct.
13###
14
15
16# Make a mapping { "$code" : "$code" } where code is a unicode code point:
17
18# Dump mapping matches *yaml
19*code.dump_code_key_value == *yaml
20
21# Load *yaml then dump matches *yaml
22*yaml.load_yaml.dump_yaml == *yaml
23
24# Dump mapping the load memory-matches mapping
25*code.code_key_value.dump_yaml.load_yaml === *code.code_key_value
26
27
28# 0 → \0 "null"
29#
30# \z is the other YAML "null" encoding. Most implementations (including
31# libyaml), seem to go with \0 when emitting.
32=== Code point 0
33--- code: 0
34--- yaml
35"\0": "\0"
36
37
38# 1-6,14-27,29-31 → \x##
39=== Code point 1
40--- code: 1
41--- yaml
42"\x01": "\x01"
43
44
45# 7 → \a "bell" (alarm)
46=== Code point 7
47--- code: 7
48--- yaml
49"\a": "\a"
50
51
52# 8 → \b "backspace"
53=== Code point 8
54--- code: 8
55--- yaml
56"\b": "\b"
57
58
59# 9 → \t "horizontal tab"
60=== Code point 9
61--- code: 9
62--- yaml
63"\t": "\t"
64
65
66# 10 → \n "linefeed" (newline)
67=== Code point 10
68--- code: 10
69--- yaml
70"\n": "\n"
71
72
73# 11 → \v "vertical tab"
74=== Code point 11
75--- code: 11
76--- yaml
77"\v": "\v"
78
79
80# 11 → \f "form feed"
81=== Code point 12
82--- code: 12
83--- yaml
84"\f": "\f"
85
86
87# 11 → \f "carriage return"
88=== Code point 13
89--- code: 13
90--- yaml
91"\r": "\r"
92
93# 27 → \e "escape"
94=== Code point 27
95--- code: 27
96--- yaml
97"\e": "\e"
98
99
100# Space character needs quotes.
101=== Code point 32
102--- code: 32
103--- yaml
104' ': ' '
105
106
107# ! is a tag indicator. Needs quotes.
108=== Code point 33
109--- code: 33
110--- yaml
111'!': '!'
112
113
114# Quote single quotes with double quotes.
115=== Code point 34
116--- code: 34
117--- yaml
118'"': '"'
119
120
121# '#' is comment character. Needs quotes.
122=== Code point 35
123--- code: 35
124--- yaml
125'#': '#'
126
127
128# $ has no special meaning. No quotes.
129=== Code point 36
130--- code: 36
131--- yaml
132$: $
133
134
135# % is directive indicator. Needs quotees.
136=== Code point 37
137--- code: 37
138--- yaml
139'%': '%'
140
141
142# & is anchor indicator. Needs quotes.
143=== Code point 38
144--- code: 38
145--- yaml
146'&': '&'
147
148
149# Quote double quotes with single quotes.
150=== Code point 39
151--- code: 39
152--- yaml
153"'": "'"
154
155
156# ( has no special meaning. No quotes.
157=== Code point 40
158--- code: 40
159--- yaml
160(: (
161
162
163# ) has no special meaning. No quotes.
164=== Code point 41
165--- code: 41
166--- yaml
167): )
168
169
170# * is an alias indicator. Needs quotes.
171=== Code point 42
172--- code: 42
173--- yaml
174'*': '*'
175
176
177# + has no special meaning. No quotes.
178=== Code point 43
179--- code: 43
180--- yaml
181+: +
182
183
184# , is a list separator. Needs quotes.
185=== Code point 44
186--- code: 44
187--- yaml
188',': ','
189
190
191# - is a sequence element marker. In many contexts it is not ambiguous when
192# unquoted, but in others it is ambiguous. libyaml always quotes it so going
193# with that for now.
194=== Code point 45
195--- code: 45
196--- yaml
197'-': '-'
198
199
200# . has no special meaning. No quotes.
201=== Code point 46
202--- code: 46
203--- yaml
204.: .
205
206
207# / has no special meaning. No quotes.
208=== Code point 47
209--- code: 47
210--- yaml
211/: /
212
213
214# 48-57 → 0-9 "digitss"
215# These values are strings, so must quote them.
216=== Code point 48
217--- code: 48
218--- yaml
219'0': '0'
220
221
222# : is a key/value separator. It is not always ambigous when not quoted, but
223# libyaml always quotes it at start of a string. Probably wise. Going with that
224# for now.
225=== Code point 58
226--- code: 58
227--- yaml
228':': ':'
229
230
231# ; has no special meaning. No quotes.
232=== Code point 59
233--- code: 59
234--- yaml
235;: ;
236
237
238# < has no special meaning. No quotes.
239=== Code point 60
240--- code: 60
241--- yaml
242<: <
243
244
245# = has no special meaning. No quotes.
246=== Code point 61
247--- code: 61
248--- yaml
249=: =
250
251
252# > is a folded scalar indicator. Needs quotes.
253=== Code point 62
254--- code: 62
255--- yaml
256'>': '>'
257
258
259# ? is a mapping key indicator. Needs quotes.
260=== Code point 63
261--- code: 63
262--- yaml
263'?': '?'
264
265
266# @ is a reserved character. Needs quotes.
267# TODO Check spec on this.
268=== Code point 64
269--- code: 64
270--- yaml
271'@': '@'
272
273
274# 65-90 → A-Z "upper case letters". No quotes.
275=== Code point 65
276--- code: 65
277--- yaml
278A: A
279
280
281# Some implementations think N means false. This should not be the case in a
282# default schema. No quotes.
283#
284# NOTE:
285#   http://yaml.org/type/bool.html suggests that many simple strings should be
286#   loaded as boolean, but this is an outdated concept. Currently, only the
287#   words true/false/null (lower case) should be loaded specially (not as
288#   strings).  This may become even more restrictive in the future. ie Only
289#   true/false/null in a flow context.
290=== Code point 78
291--- code: 78
292--- yaml
293N: N
294
295
296# Some implementations think Y means true. This should not be the case in a
297# default schema. No quotes.
298=== Code point 89
299--- code: 89
300--- yaml
301Y: Y
302
303
304# [ is a flow sequence start indicator. Needs quotes.
305=== Code point 91
306--- code: 91
307--- yaml
308'[': '['
309
310
311# \ is an escape indicator in double quoted strings. Used on its own it has no
312# special meaning. No quotes.
313=== Code point 92
314--- SKIP
315--- code: 92
316--- yaml
317\: \
318
319
320# ] is a flow sequence end indicator. Needs quotes.
321=== Code point 93
322--- code: 93
323--- yaml
324']': ']'
325
326
327# ^ has no special meaning. No quotes.
328=== Code point 94
329--- code: 94
330--- yaml
331^: ^
332
333
334# _ has no special meaning. No quotes.
335=== Code point 95
336--- code: 95
337--- yaml
338_: _
339
340
341# ` is a reserved character. Needs quotes.
342=== Code point 96
343--- code: 96
344--- yaml
345'`': '`'
346
347
348# 65-90 → a-z "lower case letters". No quotes.
349=== Code point 97
350--- code: 97
351--- yaml
352a: a
353
354
355# Some implementations think n means false. This should not be the case in a
356# default schema. No quotes.
357=== Code point 110
358--- code: 110
359--- yaml
360n: n
361
362
363# Some implementations think y means true. This should not be the case in a
364# default schema. No quotes.
365=== Code point 121
366--- code: 121
367--- yaml
368y: y
369
370
371# { is a flow mapping start indicator. Needs quotes.
372=== Code point 123
373--- code: 123
374--- yaml
375'{': '{'
376
377
378# | is a literal scalar indicator. Needs quotes.
379=== Code point 124
380--- code: 124
381--- yaml
382'|': '|'
383
384
385# } is a flow mapping end indicator. Needs quotes.
386=== Code point 125
387--- code: 125
388--- yaml
389'}': '}'
390
391
392# A single ~ has long been used as a plain scalar representation of null. This
393# should be deprecated, but may take a while.
394=== Code point 126
395--- code: 126
396--- yaml
397'~': '~'
398--- unquoted
399~: ~
400
401
402# 127 → "escape"
403# YAML does not have a special character. YAML2 should consider \?.
404=== Code point 127
405--- code: 127
406--- yaml
407"\x7F": "\x7F"
408
409
410# 80-84,86-159 → \x##
411=== Code point 128
412--- code: 128
413--- yaml
414"\x80": "\x80"
415
416
417# 133 (\x85) → "next line" (NEL)
418=== Code point 133
419--- code: 133
420--- yaml
421"\N": "\N"
422
423
424# 160 (\xA0) → "non-breaking space"
425# It seems extremely odd that YAML does not escape this.
426# Investigate further.
427=== Code point 160
428--- SKIP
429--- code: 160
430--- yaml
431 :  
432
433
434# 161-… → From here on up use printable unicode chars.
435# XXX Need to look into other special code blocks. Especially those known to
436# libyaml.
437=== Code point 161
438--- code: 161
439--- yaml
440¡: ¡
441