1# This file contains a collection of tests for tclEncoding.c 2# Sourcing this file into Tcl runs the tests and generates output for errors. 3# No output means no errors were found. 4# 5# Copyright © 1997 Sun Microsystems, Inc. 6# Copyright © 1998-1999 Scriptics Corporation. 7# 8# See the file "license.terms" for information on usage and redistribution of 9# this file, and for a DISCLAIMER OF ALL WARRANTIES. 10 11if {"::tcltest" ni [namespace children]} { 12 package require tcltest 2.5 13 namespace import -force ::tcltest::* 14} 15 16 17namespace eval ::tcl::test::encoding { 18 variable x 19 20catch { 21 ::tcltest::loadTestedCommands 22 package require -exact tcl::test [info patchlevel] 23} 24 25proc toutf {args} { 26 variable x 27 lappend x "toutf $args" 28} 29proc fromutf {args} { 30 variable x 31 lappend x "fromutf $args" 32} 33 34proc runtests {} { 35 variable x 36 37# Some tests require the testencoding command 38testConstraint testencoding [llength [info commands testencoding]] 39testConstraint testbytestring [llength [info commands testbytestring]] 40testConstraint teststringbytes [llength [info commands teststringbytes]] 41testConstraint exec [llength [info commands exec]] 42testConstraint testgetencpath [llength [info commands testgetencpath]] 43 44# TclInitEncodingSubsystem is tested by the rest of this file 45# TclFinalizeEncodingSubsystem is not currently tested 46 47test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup { 48 set old [encoding system] 49} -constraints {testencoding} -body { 50 testencoding create foo [namespace origin toutf] [namespace origin fromutf] 51 encoding system foo 52 set x {} 53 encoding convertto abcd 54 return $x 55} -cleanup { 56 encoding system $old 57 testencoding delete foo 58} -result {{fromutf }} 59test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} { 60 testencoding create foo [namespace origin toutf] [namespace origin fromutf] 61 set x {} 62 encoding convertto foo abcd 63 testencoding delete foo 64 return $x 65} {{fromutf }} 66test encoding-1.3 {Tcl_GetEncoding: load encoding} { 67 list [encoding convertto jis0208 乎] \ 68 [encoding convertfrom jis0208 8C] 69} "8C 乎" 70 71test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} { 72 encoding convertto jis0208 乎 73} {8C} 74test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup { 75 set system [encoding system] 76 set path [encoding dirs] 77} -constraints {testencoding} -body { 78 encoding system shiftjis ;# incr ref count 79 encoding dirs [list [pwd]] 80 set x [encoding convertto shiftjis 乎] ;# old one found 81 encoding system iso8859-1 82 llength shiftjis ;# Shimmer away any cache of Tcl_Encoding 83 lappend x [catch {encoding convertto shiftjis 乎} msg] $msg 84} -cleanup { 85 encoding system iso8859-1 86 encoding dirs $path 87 encoding system $system 88} -result "\x8C\xC1 1 {unknown encoding \"shiftjis\"}" 89 90test encoding-3.1 {Tcl_GetEncodingName, NULL} -setup { 91 set old [encoding system] 92} -body { 93 encoding system shiftjis 94 encoding system 95} -cleanup { 96 encoding system $old 97} -result {shiftjis} 98test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { 99 set old [fconfigure stdout -encoding] 100} -body { 101 fconfigure stdout -encoding jis0208 102 fconfigure stdout -encoding 103} -cleanup { 104 fconfigure stdout -encoding $old 105} -result {jis0208} 106 107test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { 108 cd [makeDirectory tmp] 109 makeDirectory [file join tmp encoding] 110 set path [encoding dirs] 111 encoding dirs {} 112 catch {unset encodings} 113 catch {unset x} 114} -body { 115 foreach encoding [encoding names] { 116 set encodings($encoding) 1 117 } 118 makeFile {} [file join tmp encoding junk.enc] 119 makeFile {} [file join tmp encoding junk2.enc] 120 encoding dirs [list [file join [pwd] encoding]] 121 foreach encoding [encoding names] { 122 if {![info exists encodings($encoding)]} { 123 lappend x $encoding 124 } 125 } 126 lsort $x 127} -cleanup { 128 encoding dirs $path 129 cd [workingDirectory] 130 removeFile [file join tmp encoding junk2.enc] 131 removeFile [file join tmp encoding junk.enc] 132 removeDirectory [file join tmp encoding] 133 removeDirectory tmp 134} -result {junk junk2} 135 136test encoding-5.1 {Tcl_SetSystemEncoding} -setup { 137 set old [encoding system] 138} -body { 139 encoding system jis0208 140 encoding convertto 乎 141} -cleanup { 142 encoding system iso8859-1 143 encoding system $old 144} -result {8C} 145test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} { 146 set old [encoding system] 147 encoding system $old 148 string compare $old [encoding system] 149} {0} 150 151test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} { 152 testencoding create foo [namespace code {toutf 1}] \ 153 [namespace code {fromutf 2}] 154 set x {} 155 encoding convertfrom foo abcd 156 encoding convertto foo abcd 157 testencoding delete foo 158 return $x 159} {{toutf 1} {fromutf 2}} 160test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} { 161 testencoding create foo [namespace code {toutf a}] \ 162 [namespace code {fromutf b}] 163 set x {} 164 encoding convertfrom foo abcd 165 encoding convertto foo abcd 166 testencoding delete foo 167 return $x 168} {{toutf a} {fromutf b}} 169 170test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} { 171 encoding convertfrom jis0208 8c8c8c8c 172} "吾吾吾吾" 173test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} { 174 set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C 175 append a $a 176 append a $a 177 append a $a 178 append a $a 179 set x [encoding convertfrom jis0208 $a] 180 list [string length $x] [string index $x 0] 181} "512 乎" 182 183test encoding-8.1 {Tcl_ExternalToUtf} { 184 set f [open [file join [temporaryDirectory] dummy] w] 185 fconfigure $f -translation binary -encoding iso8859-1 186 puts -nonewline $f "ab\x8C\xC1g" 187 close $f 188 set f [open [file join [temporaryDirectory] dummy] r] 189 fconfigure $f -translation binary -encoding shiftjis 190 set x [read $f] 191 close $f 192 file delete [file join [temporaryDirectory] dummy] 193 return $x 194} "ab乎g" 195 196test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} { 197 encoding convertto jis0208 "吾吾吾吾" 198} {8c8c8c8c} 199test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} { 200 set a 乎乎乎乎乎乎乎乎 201 append a $a 202 append a $a 203 append a $a 204 append a $a 205 append a $a 206 append a $a 207 set x [encoding convertto jis0208 $a] 208 list [string length $x] [string range $x 0 1] 209} "1024 8C" 210 211test encoding-10.1 {Tcl_UtfToExternal} { 212 set f [open [file join [temporaryDirectory] dummy] w] 213 fconfigure $f -translation binary -encoding shiftjis 214 puts -nonewline $f "ab乎g" 215 close $f 216 set f [open [file join [temporaryDirectory] dummy] r] 217 fconfigure $f -translation binary -encoding iso8859-1 218 set x [read $f] 219 close $f 220 file delete [file join [temporaryDirectory] dummy] 221 return $x 222} "ab\x8C\xC1g" 223 224proc viewable {str} { 225 set res "" 226 foreach c [split $str {}] { 227 if {[string is print $c] && [string is ascii $c]} { 228 append res $c 229 } else { 230 append res "\\u[format %4.4X [scan $c %c]]" 231 } 232 } 233 return "$str ($res)" 234} 235 236test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} { 237 set system [encoding system] 238 set path [encoding dirs] 239 encoding system iso8859-1 240 encoding dirs {} 241 llength jis0208 ;# Shimmer any cached Tcl_Encoding in shared literal 242 set x [list [catch {encoding convertto jis0208 乎} msg] $msg] 243 encoding dirs $path 244 encoding system $system 245 lappend x [encoding convertto jis0208 乎] 246} {1 {unknown encoding "jis0208"} 8C} 247test encoding-11.2 {LoadEncodingFile: single-byte} { 248 encoding convertfrom jis0201 \xA1 249} "。" 250test encoding-11.3 {LoadEncodingFile: double-byte} { 251 encoding convertfrom jis0208 8C 252} 乎 253test encoding-11.4 {LoadEncodingFile: multi-byte} { 254 encoding convertfrom shiftjis \x8C\xC1 255} 乎 256test encoding-11.5 {LoadEncodingFile: escape file} { 257 viewable [encoding convertto iso2022 乎] 258} [viewable "\x1B\$B8C\x1B(B"] 259test encoding-11.5.1 {LoadEncodingFile: escape file} { 260 viewable [encoding convertto iso2022-jp 乎] 261} [viewable "\x1B\$B8C\x1B(B"] 262test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} -setup { 263 set system [encoding system] 264 set path [encoding dirs] 265 encoding system iso8859-1 266} -body { 267 cd [temporaryDirectory] 268 encoding dirs [file join tmp encoding] 269 makeDirectory tmp 270 makeDirectory [file join tmp encoding] 271 set f [open [file join tmp encoding splat.enc] w] 272 fconfigure $f -translation binary 273 puts $f "abcdefghijklmnop" 274 close $f 275 encoding convertto splat 乎 276} -returnCodes error -cleanup { 277 file delete [file join [temporaryDirectory] tmp encoding splat.enc] 278 removeDirectory [file join tmp encoding] 279 removeDirectory tmp 280 cd [workingDirectory] 281 encoding dirs $path 282 encoding system $system 283} -result {invalid encoding file "splat"} 284test encoding-11.8 {encoding: extended Unicode UTF-16} { 285 viewable [encoding convertto utf-16le ] 286} {=Ø9Þ (=\u00D89\u00DE)} 287test encoding-11.9 {encoding: extended Unicode UTF-16} { 288 viewable [encoding convertto utf-16be ] 289} {Ø=Þ9 (\u00D8=\u00DE9)} 290# OpenEncodingFile is fully tested by the rest of the tests in this file. 291 292test encoding-12.1 {LoadTableEncoding: normal encoding} { 293 set x [encoding convertto iso8859-3 Ġ] 294 append x [encoding convertto iso8859-3 Õ] 295 append x [encoding convertfrom iso8859-3 Õ] 296} "Õ?Ġ" 297test encoding-12.2 {LoadTableEncoding: single-byte encoding} { 298 set x [encoding convertto iso8859-3 abĠg] 299 append x [encoding convertfrom iso8859-3 abÕg] 300} "abÕgabĠg" 301test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { 302 set x [encoding convertto shiftjis ab乎g] 303 append x [encoding convertfrom shiftjis ab\x8C\xC1g] 304} "ab\x8C\xC1gab乎g" 305test encoding-12.4 {LoadTableEncoding: double-byte encoding} { 306 set x [encoding convertto jis0208 乎α] 307 append x [encoding convertfrom jis0208 8C&A] 308} "8C&A乎α" 309test encoding-12.5 {LoadTableEncoding: symbol encoding} { 310 set x [encoding convertto symbol γ] 311 append x [encoding convertto symbol g] 312 append x [encoding convertfrom symbol g] 313} "ggγ" 314 315test encoding-13.1 {LoadEscapeTable} { 316 viewable [set x [encoding convertto iso2022 ab乎棙g]] 317} [viewable "ab\x1B\$B8C\x1B\$\(DD%\x1B(Bg"] 318 319test encoding-15.1 {UtfToUtfProc} { 320 encoding convertto utf-8 £ 321} "\xC2\xA3" 322test encoding-15.2 {UtfToUtfProc null character output} testbytestring { 323 binary scan [testbytestring [encoding convertto utf-8 \x00]] H* z 324 set z 325} 00 326test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { 327 set y [encoding convertfrom utf-8 [encoding convertto utf-8 \x00]] 328 binary scan [teststringbytes $y] H* z 329 set z 330} c080 331test encoding-15.4 {UtfToUtfProc emoji character input} -body { 332 set x \xED\xA0\xBD\xED\xB8\x82 333 set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82] 334 list [string length $x] $y 335} -result "6 " 336test encoding-15.5 {UtfToUtfProc emoji character input} { 337 set x \xF0\x9F\x98\x82 338 set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] 339 list [string length $x] $y 340} "4 " 341test encoding-15.6 {UtfToUtfProc emoji character output} { 342 set x \uDE02\uD83D\uDE02\uD83D 343 set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D] 344 binary scan $y H* z 345 list [string length $y] $z 346} {10 efbfbdf09f9882efbfbd} 347test encoding-15.7 {UtfToUtfProc emoji character output} { 348 set x \uDE02\uD83D\uD83D 349 set y [encoding convertto utf-8 \uDE02\uD83D\uD83D] 350 binary scan $y H* z 351 list [string length $x] [string length $y] $z 352} {3 9 efbfbdefbfbdefbfbd} 353test encoding-15.8 {UtfToUtfProc emoji character output} { 354 set x \uDE02\uD83Dé 355 set y [encoding convertto utf-8 \uDE02\uD83Dé] 356 binary scan $y H* z 357 list [string length $x] [string length $y] $z 358} {3 8 efbfbdefbfbdc3a9} 359test encoding-15.9 {UtfToUtfProc emoji character output} { 360 set x \uDE02\uD83DX 361 set y [encoding convertto utf-8 \uDE02\uD83DX] 362 binary scan $y H* z 363 list [string length $x] [string length $y] $z 364} {3 7 efbfbdefbfbd58} 365test encoding-15.10 {UtfToUtfProc high surrogate character output} { 366 set x \uDE02é 367 set y [encoding convertto utf-8 \uDE02é] 368 binary scan $y H* z 369 list [string length $x] [string length $y] $z 370} {2 5 efbfbdc3a9} 371test encoding-15.11 {UtfToUtfProc low surrogate character output} { 372 set x \uDA02é 373 set y [encoding convertto utf-8 \uDA02é] 374 binary scan $y H* z 375 list [string length $x] [string length $y] $z 376} {2 5 efbfbdc3a9} 377test encoding-15.12 {UtfToUtfProc high surrogate character output} { 378 set x \uDE02Y 379 set y [encoding convertto utf-8 \uDE02Y] 380 binary scan $y H* z 381 list [string length $x] [string length $y] $z 382} {2 4 efbfbd59} 383test encoding-15.13 {UtfToUtfProc low surrogate character output} { 384 set x \uDA02Y 385 set y [encoding convertto utf-8 \uDA02Y] 386 binary scan $y H* z 387 list [string length $x] [string length $y] $z 388} {2 4 efbfbd59} 389test encoding-15.14 {UtfToUtfProc high surrogate character output} { 390 set x \uDE02 391 set y [encoding convertto utf-8 \uDE02] 392 binary scan $y H* z 393 list [string length $x] [string length $y] $z 394} {1 3 efbfbd} 395test encoding-15.15 {UtfToUtfProc low surrogate character output} { 396 set x \uDA02 397 set y [encoding convertto utf-8 \uDA02] 398 binary scan $y H* z 399 list [string length $x] [string length $y] $z 400} {1 3 efbfbd} 401test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { 402 set x \xF0\xA0\xA1\xC2 403 set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2] 404 list [string length $x] $y 405} "4 \xF0\xA0\xA1\xC2" 406test encoding-15.17 {UtfToUtfProc emoji character output} { 407 set x 408 set y [encoding convertto utf-8 ] 409 binary scan $y H* z 410 list [string length $y] $z 411} {4 f09f9882} 412test encoding-15.18 {UtfToUtfProc CESU-8 6-byte sequence} { 413 set y [encoding convertto cesu-8 \U10000] 414 binary scan $y H* z 415 list [string length $y] $z 416} {6 eda080edb080} 417test encoding-15.19 {UtfToUtfProc CESU-8 upper surrogate} { 418 set y [encoding convertto cesu-8 \uD800] 419 binary scan $y H* z 420 list [string length $y] $z 421} {3 eda080} 422test encoding-15.20 {UtfToUtfProc CESU-8 lower surrogate} { 423 set y [encoding convertto cesu-8 \uDC00] 424 binary scan $y H* z 425 list [string length $y] $z 426} {3 edb080} 427test encoding-15.21 {UtfToUtfProc CESU-8 noncharacter} { 428 set y [encoding convertto cesu-8 \uFFFF] 429 binary scan $y H* z 430 list [string length $y] $z 431} {3 efbfbf} 432 433test encoding-16.1 {Utf16ToUtfProc} -body { 434 set val [encoding convertfrom utf-16 NN] 435 list $val [format %x [scan $val %c]] 436} -result "乎 4e4e" 437test encoding-16.2 {Utf16ToUtfProc} -body { 438 set val [encoding convertfrom utf-16 "\xD8\xD8\xDC\xDC"] 439 list $val [format %x [scan $val %c]] 440} -result "\U460DC 460dc" 441test encoding-16.3 {Utf16ToUtfProc} -body { 442 set val [encoding convertfrom utf-16 "\xDC\xDC"] 443 list $val [format %x [scan $val %c]] 444} -result "\uDCDC dcdc" 445test encoding-16.4 {Ucs2ToUtfProc} -body { 446 set val [encoding convertfrom ucs-2 NN] 447 list $val [format %x [scan $val %c]] 448} -result "乎 4e4e" 449test encoding-16.4 {Ucs2ToUtfProc} -body { 450 set val [encoding convertfrom ucs-2 "\xD8\xD8\xDC\xDC"] 451 list $val [format %x [scan $val %c]] 452} -result "\U460DC 460dc" 453 454test encoding-17.1 {UtfToUtf16Proc} -body { 455 encoding convertto utf-16 "\U460DC" 456} -result "\xD8\xD8\xDC\xDC" 457test encoding-17.2 {UtfToUcs2Proc} -body { 458 encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] 459} -result "\uFFFD" 460test encoding-17.3 {UtfToUtf16Proc} -body { 461 encoding convertto utf-16be "\uDCDC" 462} -result "\xFF\xFD" 463test encoding-17.4 {UtfToUtf16Proc} -body { 464 encoding convertto utf-16le "\uD8D8" 465} -result "\xFD\xFF" 466 467test encoding-18.1 {TableToUtfProc} { 468} {} 469 470test encoding-19.1 {TableFromUtfProc} { 471} {} 472 473test encoding-20.1 {TableFreefProc} { 474} {} 475 476test encoding-21.1 {EscapeToUtfProc} { 477} {} 478 479test encoding-22.1 {EscapeFromUtfProc} { 480} {} 481 482set iso2022encData "\x1B\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\x1B(B 483\x1B\$B>.@Z<jAwIU@h\$H\$7\$F;HMQ\$7\$F\$*\$j\$^\$9!#62\$lF~\$j\$^\$9\$,!\"@5\$7\$\$=;=j\$r\$4EPO?\$7\$J\$*\x1B(B 484\x1B\$B\$*4j\$\$\$\$\$?\$7\$^\$9!#\$^\$?!\"BgJQ62=L\$G\$9\$,!\"=;=jJQ99\$N\$\"\$H!\"F|K\\8l%5!<%S%9It!J\x1B(B 485casino_japanese@___.com \x1B\$B!K\$^\$G\$4=;=jJQ99:Q\$NO\"Mm\$r\$\$\$?\$@\$1\$J\$\$\$G\x1B(B 486\x1B\$B\$7\$g\$&\$+!)\x1B(B" 487 488set iso2022uniData [encoding convertfrom iso2022-jp $iso2022encData] 489set iso2022uniData2 "私どもでは、チップご購入時にご登録いただいたご住所をキャッシュアウトの際の 490小切手送付先として使用しております。恐れ入りますが、正しい住所をご登録しなお 491お願いいたします。また、大変恐縮ですが、住所変更のあと、日本語サービス部( 492casino_japanese@___.com )までご住所変更済の連絡をいただけないで 493しょうか?" 494 495cd [temporaryDirectory] 496set fid [open iso2022.txt w] 497fconfigure $fid -encoding binary 498puts -nonewline $fid $iso2022encData 499close $fid 500 501test encoding-23.1 {iso2022-jp escape encoding test} { 502 string equal $iso2022uniData $iso2022uniData2 503} 1 504test encoding-23.2 {iso2022-jp escape encoding test} { 505 # This checks that 'gets' isn't resetting the encoding inappropriately. 506 # [Bug #523988] 507 set fid [open iso2022.txt r] 508 fconfigure $fid -encoding iso2022-jp 509 set out "" 510 set count 0 511 while {[set num [gets $fid line]] >= 0} { 512 if {$count} { 513 incr count 1 ; # account for newline 514 append out \n 515 } 516 append out $line 517 incr count $num 518 } 519 close $fid 520 if {[string compare $iso2022uniData $out]} { 521 return -code error "iso2022-jp read in doesn't match original" 522 } 523 list $count $out 524} [list [string length $iso2022uniData] $iso2022uniData] 525test encoding-23.3 {iso2022-jp escape encoding test} { 526 # read $fis <size> reads size in chars, not raw bytes. 527 set fid [open iso2022.txt r] 528 fconfigure $fid -encoding iso2022-jp 529 set data [read $fid 50] 530 close $fid 531 return $data 532} [string range $iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50 533cd [workingDirectory] 534 535# Code to make the next few tests more intelligible; the code being tested 536# should be in the body of the test! 537proc runInSubprocess {contents {filename iso2022.tcl}} { 538 set theFile [makeFile $contents $filename] 539 try { 540 exec [interpreter] $theFile 541 } finally { 542 removeFile $theFile 543 } 544} 545 546test encoding-24.1 {EscapeFreeProc on open channels} exec { 547 runInSubprocess { 548 set f [open [file join [file dirname [info script]] iso2022.txt]] 549 fconfigure $f -encoding iso2022-jp 550 gets $f 551 } 552} {} 553test encoding-24.2 {EscapeFreeProc on open channels} {exec} { 554 # Bug #524674 output 555 viewable [runInSubprocess { 556 encoding system cp1252; # Bug #2891556 crash revelator 557 fconfigure stdout -encoding iso2022-jp 558 puts ab乎棙g 559 set env(TCL_FINALIZE_ON_EXIT) 1 560 exit 561 }] 562} "ab\x1B\$B8C\x1B\$(DD%\x1B(Bg (ab\\u001B\$B8C\\u001B\$(DD%\\u001B(Bg)" 563test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { 564 # Bug #219314 - if we don't free escape encodings correctly on channel 565 # closure, we go boom 566 set file [makeFile { 567 encoding system iso2022-jp 568 set a "乎乞也"; # 3 Japanese Kanji letters 569 puts $a 570 } iso2022.tcl] 571 set f [open "|[list [interpreter] $file]"] 572 fconfigure $f -encoding iso2022-jp 573 set count [gets $f line] 574 close $f 575 removeFile iso2022.tcl 576 list $count [viewable $line] 577} [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"] 578 579test encoding-24.4 {Parse valid or invalid utf-8} { 580 string length [encoding convertfrom utf-8 "\xC0\x80"] 581} 1 582test encoding-24.5 {Parse valid or invalid utf-8} { 583 string length [encoding convertfrom utf-8 "\xC0\x81"] 584} 2 585test encoding-24.6 {Parse valid or invalid utf-8} { 586 string length [encoding convertfrom utf-8 "\xC1\xBF"] 587} 2 588test encoding-24.7 {Parse valid or invalid utf-8} { 589 string length [encoding convertfrom utf-8 "\xC2\x80"] 590} 1 591test encoding-24.8 {Parse valid or invalid utf-8} { 592 string length [encoding convertfrom utf-8 "\xE0\x80\x80"] 593} 3 594test encoding-24.9 {Parse valid or invalid utf-8} { 595 string length [encoding convertfrom utf-8 "\xE0\x9F\xBF"] 596} 3 597test encoding-24.10 {Parse valid or invalid utf-8} { 598 string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] 599} 1 600test encoding-24.11 {Parse valid or invalid utf-8} { 601 string length [encoding convertfrom utf-8 "\xEF\xBF\xBF"] 602} 1 603 604file delete [file join [temporaryDirectory] iso2022.txt] 605 606# 607# Begin jajp encoding round-trip conformity tests 608# 609proc foreach-jisx0208 {varName command} { 610 upvar 1 $varName code 611 foreach range { 612 {2121 217E} 613 {2221 222E} 614 {223A 2241} 615 {224A 2250} 616 {225C 226A} 617 {2272 2279} 618 {227E 227E} 619 {2330 2339} 620 {2421 2473} 621 {2521 2576} 622 {2821 2821} 623 {282C 282C} 624 {2837 2837} 625 626 {30 21 4E 7E} 627 {4F21 4F53} 628 629 {50 21 73 7E} 630 {7421 7426} 631 } { 632 if {[llength $range] == 2} { 633 # for adhoc range. simple {first last}. inclusive. 634 scan $range %x%x first last 635 for {set i $first} {$i <= $last} {incr i} { 636 set code $i 637 uplevel 1 $command 638 } 639 } elseif {[llength $range] == 4} { 640 # for uniform range. 641 scan $range %x%x%x%x h0 l0 hend lend 642 for {set hi $h0} {$hi <= $hend} {incr hi} { 643 for {set lo $l0} {$lo <= $lend} {incr lo} { 644 set code [expr {$hi << 8 | ($lo & 0xff)}] 645 uplevel 1 $command 646 } 647 } 648 } else { 649 error "really?" 650 } 651 } 652} 653proc gen-jisx0208-euc-jp {code} { 654 binary format cc \ 655 [expr {($code >> 8) | 0x80}] [expr {($code & 0xFF) | 0x80}] 656} 657proc gen-jisx0208-iso2022-jp {code} { 658 binary format a3cca3 \ 659 "\x1B\$B" [expr {$code >> 8}] [expr {$code & 0xFF}] "\x1B(B" 660} 661proc gen-jisx0208-cp932 {code} { 662 set c1 [expr {($code >> 8) | 0x80}] 663 set c2 [expr {($code & 0xff)| 0x80}] 664 if {$c1 % 2} { 665 set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x31 : 0x71)}] 666 incr c2 [expr {- (0x60 + ($c2 < 0xE0))}] 667 } else { 668 set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x30 : 0x70)}] 669 incr c2 -2 670 } 671 binary format cc $c1 $c2 672} 673proc channel-diff {fa fb} { 674 set diff {} 675 while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} { 676 if {[string compare $la $lb] == 0} continue 677 # lappend diff $la $lb 678 679 # For more readable (easy to analyze) output. 680 set code [lindex $la 0] 681 binary scan [lindex $la 1] H* expected 682 binary scan [lindex $lb 1] H* got 683 lappend diff [list $code $expected $got] 684 } 685 return $diff 686} 687 688# Create char tables. 689cd [temporaryDirectory] 690foreach enc {cp932 euc-jp iso2022-jp} { 691 set f [open $enc.chars w] 692 fconfigure $f -encoding binary 693 foreach-jisx0208 code { 694 puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]] 695 } 696 close $f 697} 698# shiftjis == cp932 for jisx0208. 699file copy -force cp932.chars shiftjis.chars 700 701set NUM 0 702foreach from {cp932 shiftjis euc-jp iso2022-jp} { 703 foreach to {cp932 shiftjis euc-jp iso2022-jp} { 704 test encoding-25.[incr NUM] "jisx0208 $from => $to" -setup { 705 cd [temporaryDirectory] 706 } -body { 707 set f [open $from.chars] 708 fconfigure $f -encoding $from 709 set out [open $from.$to.tcltestout w] 710 fconfigure $out -encoding $to 711 puts -nonewline $out [read $f] 712 close $out 713 close $f 714 # then compare $to.chars <=> $from.to.tcltestout as binary. 715 set fa [open $to.chars rb] 716 set fb [open $from.$to.tcltestout rb] 717 channel-diff $fa $fb 718 # Difference should be empty. 719 } -cleanup { 720 close $fa 721 close $fb 722 } -result {} 723 } 724} 725 726test encoding-26.0 {Tcl_GetEncodingSearchPath} -constraints { 727 testgetencpath 728} -setup { 729 set origPath [testgetencpath] 730 testsetencpath slappy 731} -body { 732 testgetencpath 733} -cleanup { 734 testsetencpath $origPath 735} -result slappy 736 737file delete {*}[glob -directory [temporaryDirectory] *.chars *.tcltestout] 738# ===> Cut here <=== 739 740# EscapeFreeProc, GetTableEncoding, unilen are fully tested by the rest of 741# this file. 742 743 744test encoding-27.1 {encoding dirs basic behavior} -returnCodes error -body { 745 encoding dirs ? ? 746} -result {wrong # args: should be "encoding dirs ?dirList?"} 747test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body { 748 encoding dirs "\{not a list" 749} -result "expected directory list but got \"\{not a list\"" 750 751} 752 753 754test encoding-28.0 {all encodings load} -body { 755 set string hello 756 foreach name [encoding names] { 757 incr count 758 encoding convertto $name $string 759 760 # discard the cached internal representation of Tcl_Encoding 761 # Unfortunately, without this, encoding 2-1 fails. 762 llength $name 763 } 764 return $count 765} -result [expr {[info exists ::tcl_precision] ? 87 : 86}] 766 767runtests 768 769} 770 771# cleanup 772namespace delete ::tcl::test::encoding 773::tcltest::cleanupTests 774return 775 776# Local Variables: 777# mode: tcl 778# End: 779