1# Test unicode strings in key and value contexts 2 3%TestML 0.1.0 4 5### 6# These tests target unicode characters that are handled special or known to be 7# problematic. Test YNY (YAML→Native→YAML) and NYN roundtripping. 8# 9# YAML scalar emission does quoting based on first character, presence of 10# escape characters, and special ambiguous cases like ': '. These ones 11# character strings go a long way towards making sure an implementation is 12# correct. 13### 14 15 16# Make a mapping { "$code" : "$code" } where code is a unicode code point: 17 18# Dump mapping matches *yaml 19*code.dump_code_key_value == *yaml 20 21# Load *yaml then dump matches *yaml 22*yaml.load_yaml.dump_yaml == *yaml 23 24# Dump mapping the load memory-matches mapping 25*code.code_key_value.dump_yaml.load_yaml === *code.code_key_value 26 27 28# 0 → \0 "null" 29# 30# \z is the other YAML "null" encoding. Most implementations (including 31# libyaml), seem to go with \0 when emitting. 32=== Code point 0 33--- code: 0 34--- yaml 35"\0": "\0" 36 37 38# 1-6,14-27,29-31 → \x## 39=== Code point 1 40--- code: 1 41--- yaml 42"\x01": "\x01" 43 44 45# 7 → \a "bell" (alarm) 46=== Code point 7 47--- code: 7 48--- yaml 49"\a": "\a" 50 51 52# 8 → \b "backspace" 53=== Code point 8 54--- code: 8 55--- yaml 56"\b": "\b" 57 58 59# 9 → \t "horizontal tab" 60=== Code point 9 61--- code: 9 62--- yaml 63"\t": "\t" 64 65 66# 10 → \n "linefeed" (newline) 67=== Code point 10 68--- code: 10 69--- yaml 70"\n": "\n" 71 72 73# 11 → \v "vertical tab" 74=== Code point 11 75--- code: 11 76--- yaml 77"\v": "\v" 78 79 80# 11 → \f "form feed" 81=== Code point 12 82--- code: 12 83--- yaml 84"\f": "\f" 85 86 87# 11 → \f "carriage return" 88=== Code point 13 89--- code: 13 90--- yaml 91"\r": "\r" 92 93# 27 → \e "escape" 94=== Code point 27 95--- code: 27 96--- yaml 97"\e": "\e" 98 99 100# Space character needs quotes. 101=== Code point 32 102--- code: 32 103--- yaml 104' ': ' ' 105 106 107# ! is a tag indicator. Needs quotes. 108=== Code point 33 109--- code: 33 110--- yaml 111'!': '!' 112 113 114# Quote single quotes with double quotes. 115=== Code point 34 116--- code: 34 117--- yaml 118'"': '"' 119 120 121# '#' is comment character. Needs quotes. 122=== Code point 35 123--- code: 35 124--- yaml 125'#': '#' 126 127 128# $ has no special meaning. No quotes. 129=== Code point 36 130--- code: 36 131--- yaml 132$: $ 133 134 135# % is directive indicator. Needs quotees. 136=== Code point 37 137--- code: 37 138--- yaml 139'%': '%' 140 141 142# & is anchor indicator. Needs quotes. 143=== Code point 38 144--- code: 38 145--- yaml 146'&': '&' 147 148 149# Quote double quotes with single quotes. 150=== Code point 39 151--- code: 39 152--- yaml 153"'": "'" 154 155 156# ( has no special meaning. No quotes. 157=== Code point 40 158--- code: 40 159--- yaml 160(: ( 161 162 163# ) has no special meaning. No quotes. 164=== Code point 41 165--- code: 41 166--- yaml 167): ) 168 169 170# * is an alias indicator. Needs quotes. 171=== Code point 42 172--- code: 42 173--- yaml 174'*': '*' 175 176 177# + has no special meaning. No quotes. 178=== Code point 43 179--- code: 43 180--- yaml 181+: + 182 183 184# , is a list separator. Needs quotes. 185=== Code point 44 186--- code: 44 187--- yaml 188',': ',' 189 190 191# - is a sequence element marker. In many contexts it is not ambiguous when 192# unquoted, but in others it is ambiguous. libyaml always quotes it so going 193# with that for now. 194=== Code point 45 195--- code: 45 196--- yaml 197'-': '-' 198 199 200# . has no special meaning. No quotes. 201=== Code point 46 202--- code: 46 203--- yaml 204.: . 205 206 207# / has no special meaning. No quotes. 208=== Code point 47 209--- code: 47 210--- yaml 211/: / 212 213 214# 48-57 → 0-9 "digitss" 215# These values are strings, so must quote them. 216=== Code point 48 217--- code: 48 218--- yaml 219'0': '0' 220 221 222# : is a key/value separator. It is not always ambigous when not quoted, but 223# libyaml always quotes it at start of a string. Probably wise. Going with that 224# for now. 225=== Code point 58 226--- code: 58 227--- yaml 228':': ':' 229 230 231# ; has no special meaning. No quotes. 232=== Code point 59 233--- code: 59 234--- yaml 235;: ; 236 237 238# < has no special meaning. No quotes. 239=== Code point 60 240--- code: 60 241--- yaml 242<: < 243 244 245# = has no special meaning. No quotes. 246=== Code point 61 247--- code: 61 248--- yaml 249=: = 250 251 252# > is a folded scalar indicator. Needs quotes. 253=== Code point 62 254--- code: 62 255--- yaml 256'>': '>' 257 258 259# ? is a mapping key indicator. Needs quotes. 260=== Code point 63 261--- code: 63 262--- yaml 263'?': '?' 264 265 266# @ is a reserved character. Needs quotes. 267# TODO Check spec on this. 268=== Code point 64 269--- code: 64 270--- yaml 271'@': '@' 272 273 274# 65-90 → A-Z "upper case letters". No quotes. 275=== Code point 65 276--- code: 65 277--- yaml 278A: A 279 280 281# Some implementations think N means false. This should not be the case in a 282# default schema. No quotes. 283# 284# NOTE: 285# http://yaml.org/type/bool.html suggests that many simple strings should be 286# loaded as boolean, but this is an outdated concept. Currently, only the 287# words true/false/null (lower case) should be loaded specially (not as 288# strings). This may become even more restrictive in the future. ie Only 289# true/false/null in a flow context. 290=== Code point 78 291--- code: 78 292--- yaml 293N: N 294 295 296# Some implementations think Y means true. This should not be the case in a 297# default schema. No quotes. 298=== Code point 89 299--- code: 89 300--- yaml 301Y: Y 302 303 304# [ is a flow sequence start indicator. Needs quotes. 305=== Code point 91 306--- code: 91 307--- yaml 308'[': '[' 309 310 311# \ is an escape indicator in double quoted strings. Used on its own it has no 312# special meaning. No quotes. 313=== Code point 92 314--- SKIP 315--- code: 92 316--- yaml 317\: \ 318 319 320# ] is a flow sequence end indicator. Needs quotes. 321=== Code point 93 322--- code: 93 323--- yaml 324']': ']' 325 326 327# ^ has no special meaning. No quotes. 328=== Code point 94 329--- code: 94 330--- yaml 331^: ^ 332 333 334# _ has no special meaning. No quotes. 335=== Code point 95 336--- code: 95 337--- yaml 338_: _ 339 340 341# ` is a reserved character. Needs quotes. 342=== Code point 96 343--- code: 96 344--- yaml 345'`': '`' 346 347 348# 65-90 → a-z "lower case letters". No quotes. 349=== Code point 97 350--- code: 97 351--- yaml 352a: a 353 354 355# Some implementations think n means false. This should not be the case in a 356# default schema. No quotes. 357=== Code point 110 358--- code: 110 359--- yaml 360n: n 361 362 363# Some implementations think y means true. This should not be the case in a 364# default schema. No quotes. 365=== Code point 121 366--- code: 121 367--- yaml 368y: y 369 370 371# { is a flow mapping start indicator. Needs quotes. 372=== Code point 123 373--- code: 123 374--- yaml 375'{': '{' 376 377 378# | is a literal scalar indicator. Needs quotes. 379=== Code point 124 380--- code: 124 381--- yaml 382'|': '|' 383 384 385# } is a flow mapping end indicator. Needs quotes. 386=== Code point 125 387--- code: 125 388--- yaml 389'}': '}' 390 391 392# A single ~ has long been used as a plain scalar representation of null. This 393# should be deprecated, but may take a while. 394=== Code point 126 395--- code: 126 396--- yaml 397'~': '~' 398--- unquoted 399~: ~ 400 401 402# 127 → "escape" 403# YAML does not have a special character. YAML2 should consider \?. 404=== Code point 127 405--- code: 127 406--- yaml 407"\x7F": "\x7F" 408 409 410# 80-84,86-159 → \x## 411=== Code point 128 412--- code: 128 413--- yaml 414"\x80": "\x80" 415 416 417# 133 (\x85) → "next line" (NEL) 418=== Code point 133 419--- code: 133 420--- yaml 421"\N": "\N" 422 423 424# 160 (\xA0) → "non-breaking space" 425# It seems extremely odd that YAML does not escape this. 426# Investigate further. 427=== Code point 160 428--- SKIP 429--- code: 160 430--- yaml 431 : 432 433 434# 161-… → From here on up use printable unicode chars. 435# XXX Need to look into other special code blocks. Especially those known to 436# libyaml. 437=== Code point 161 438--- code: 161 439--- yaml 440¡: ¡ 441