1# This file contains a collection of tests for tclEncoding.c
2# Sourcing this file into Tcl runs the tests and generates output for errors.
3# No output means no errors were found.
4#
5# Copyright © 1997 Sun Microsystems, Inc.
6# Copyright © 1998-1999 Scriptics Corporation.
7#
8# See the file "license.terms" for information on usage and redistribution of
9# this file, and for a DISCLAIMER OF ALL WARRANTIES.
10
11if {"::tcltest" ni [namespace children]} {
12    package require tcltest 2.5
13    namespace import -force ::tcltest::*
14}
15
16
17namespace eval ::tcl::test::encoding {
18    variable x
19
20catch {
21    ::tcltest::loadTestedCommands
22    package require -exact tcl::test [info patchlevel]
23}
24
25proc toutf {args} {
26    variable x
27    lappend x "toutf $args"
28}
29proc fromutf {args} {
30    variable x
31    lappend x "fromutf $args"
32}
33
34proc runtests {} {
35    variable x
36
37# Some tests require the testencoding command
38testConstraint testencoding [llength [info commands testencoding]]
39testConstraint testbytestring [llength [info commands testbytestring]]
40testConstraint teststringbytes [llength [info commands teststringbytes]]
41testConstraint exec [llength [info commands exec]]
42testConstraint testgetencpath [llength [info commands testgetencpath]]
43
44# TclInitEncodingSubsystem is tested by the rest of this file
45# TclFinalizeEncodingSubsystem is not currently tested
46
47test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup {
48    set old [encoding system]
49} -constraints {testencoding} -body {
50    testencoding create foo [namespace origin toutf] [namespace origin fromutf]
51    encoding system foo
52    set x {}
53    encoding convertto abcd
54    return $x
55} -cleanup {
56    encoding system $old
57    testencoding delete foo
58} -result {{fromutf }}
59test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} {
60    testencoding create foo [namespace origin toutf] [namespace origin fromutf]
61    set x {}
62    encoding convertto foo abcd
63    testencoding delete foo
64    return $x
65} {{fromutf }}
66test encoding-1.3 {Tcl_GetEncoding: load encoding} {
67    list [encoding convertto jis0208 乎] \
68	[encoding convertfrom jis0208 8C]
69} "8C 乎"
70
71test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} {
72    encoding convertto jis0208 乎
73} {8C}
74test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup {
75    set system [encoding system]
76    set path [encoding dirs]
77} -constraints {testencoding} -body {
78    encoding system shiftjis		;# incr ref count
79    encoding dirs [list [pwd]]
80    set x [encoding convertto shiftjis 乎]	;# old one found
81    encoding system iso8859-1
82    llength shiftjis		;# Shimmer away any cache of Tcl_Encoding
83    lappend x [catch {encoding convertto shiftjis 乎} msg] $msg
84} -cleanup {
85    encoding system iso8859-1
86    encoding dirs $path
87    encoding system $system
88} -result "\x8C\xC1 1 {unknown encoding \"shiftjis\"}"
89
90test encoding-3.1 {Tcl_GetEncodingName, NULL} -setup {
91    set old [encoding system]
92} -body {
93    encoding system shiftjis
94    encoding system
95} -cleanup {
96    encoding system $old
97} -result {shiftjis}
98test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup {
99    set old [fconfigure stdout -encoding]
100} -body {
101    fconfigure stdout -encoding jis0208
102    fconfigure stdout -encoding
103} -cleanup {
104    fconfigure stdout -encoding $old
105} -result {jis0208}
106
107test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup {
108    cd [makeDirectory tmp]
109    makeDirectory [file join tmp encoding]
110    set path [encoding dirs]
111    encoding dirs {}
112    catch {unset encodings}
113    catch {unset x}
114} -body {
115    foreach encoding [encoding names] {
116	set encodings($encoding) 1
117    }
118    makeFile {} [file join tmp encoding junk.enc]
119    makeFile {} [file join tmp encoding junk2.enc]
120    encoding dirs [list [file join [pwd] encoding]]
121    foreach encoding [encoding names] {
122	if {![info exists encodings($encoding)]} {
123	    lappend x $encoding
124	}
125    }
126    lsort $x
127} -cleanup {
128    encoding dirs $path
129    cd [workingDirectory]
130    removeFile [file join tmp encoding junk2.enc]
131    removeFile [file join tmp encoding junk.enc]
132    removeDirectory [file join tmp encoding]
133    removeDirectory tmp
134} -result {junk junk2}
135
136test encoding-5.1 {Tcl_SetSystemEncoding} -setup {
137    set old [encoding system]
138} -body {
139    encoding system jis0208
140    encoding convertto 乎
141} -cleanup {
142    encoding system iso8859-1
143    encoding system $old
144} -result {8C}
145test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} {
146    set old [encoding system]
147    encoding system $old
148    string compare $old [encoding system]
149} {0}
150
151test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} {
152    testencoding create foo [namespace code {toutf 1}] \
153	[namespace code {fromutf 2}]
154    set x {}
155    encoding convertfrom foo abcd
156    encoding convertto foo abcd
157    testencoding delete foo
158    return $x
159} {{toutf 1} {fromutf 2}}
160test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} {
161    testencoding create foo [namespace code {toutf a}] \
162	[namespace code {fromutf b}]
163    set x {}
164    encoding convertfrom foo abcd
165    encoding convertto foo abcd
166    testencoding delete foo
167    return $x
168} {{toutf a} {fromutf b}}
169
170test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} {
171    encoding convertfrom jis0208 8c8c8c8c
172} "吾吾吾吾"
173test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} {
174    set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C
175    append a $a
176    append a $a
177    append a $a
178    append a $a
179    set x [encoding convertfrom jis0208 $a]
180    list [string length $x] [string index $x 0]
181} "512 乎"
182
183test encoding-8.1 {Tcl_ExternalToUtf} {
184    set f [open [file join [temporaryDirectory] dummy] w]
185    fconfigure $f -translation binary -encoding iso8859-1
186    puts -nonewline $f "ab\x8C\xC1g"
187    close $f
188    set f [open [file join [temporaryDirectory] dummy] r]
189    fconfigure $f -translation binary -encoding shiftjis
190    set x [read $f]
191    close $f
192    file delete [file join [temporaryDirectory] dummy]
193    return $x
194} "ab乎g"
195
196test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} {
197    encoding convertto jis0208 "吾吾吾吾"
198} {8c8c8c8c}
199test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} {
200    set a 乎乎乎乎乎乎乎乎
201    append a $a
202    append a $a
203    append a $a
204    append a $a
205    append a $a
206    append a $a
207    set x [encoding convertto jis0208 $a]
208    list [string length $x] [string range $x 0 1]
209} "1024 8C"
210
211test encoding-10.1 {Tcl_UtfToExternal} {
212    set f [open [file join [temporaryDirectory] dummy] w]
213    fconfigure $f -translation binary -encoding shiftjis
214    puts -nonewline $f "ab乎g"
215    close $f
216    set f [open [file join [temporaryDirectory] dummy] r]
217    fconfigure $f -translation binary -encoding iso8859-1
218    set x [read $f]
219    close $f
220    file delete [file join [temporaryDirectory] dummy]
221    return $x
222} "ab\x8C\xC1g"
223
224proc viewable {str} {
225    set res ""
226    foreach c [split $str {}] {
227	if {[string is print $c] && [string is ascii $c]} {
228	    append res $c
229	} else {
230	    append res "\\u[format %4.4X [scan $c %c]]"
231	}
232    }
233    return "$str ($res)"
234}
235
236test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} {
237    set system [encoding system]
238    set path [encoding dirs]
239    encoding system iso8859-1
240    encoding dirs {}
241    llength jis0208	;# Shimmer any cached Tcl_Encoding in shared literal
242    set x [list [catch {encoding convertto jis0208 乎} msg] $msg]
243    encoding dirs $path
244    encoding system $system
245    lappend x [encoding convertto jis0208 乎]
246} {1 {unknown encoding "jis0208"} 8C}
247test encoding-11.2 {LoadEncodingFile: single-byte} {
248    encoding convertfrom jis0201 \xA1
249} "。"
250test encoding-11.3 {LoadEncodingFile: double-byte} {
251    encoding convertfrom jis0208 8C
252} 乎
253test encoding-11.4 {LoadEncodingFile: multi-byte} {
254    encoding convertfrom shiftjis \x8C\xC1
255} 乎
256test encoding-11.5 {LoadEncodingFile: escape file} {
257    viewable [encoding convertto iso2022 乎]
258} [viewable "\x1B\$B8C\x1B(B"]
259test encoding-11.5.1 {LoadEncodingFile: escape file} {
260    viewable [encoding convertto iso2022-jp 乎]
261} [viewable "\x1B\$B8C\x1B(B"]
262test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} -setup {
263    set system [encoding system]
264    set path [encoding dirs]
265    encoding system iso8859-1
266} -body {
267    cd [temporaryDirectory]
268    encoding dirs [file join tmp encoding]
269    makeDirectory tmp
270    makeDirectory [file join tmp encoding]
271    set f [open [file join tmp encoding splat.enc] w]
272    fconfigure $f -translation binary
273    puts $f "abcdefghijklmnop"
274    close $f
275    encoding convertto splat 乎
276} -returnCodes error -cleanup {
277    file delete [file join [temporaryDirectory] tmp encoding splat.enc]
278    removeDirectory [file join tmp encoding]
279    removeDirectory tmp
280    cd [workingDirectory]
281    encoding dirs $path
282    encoding system $system
283} -result {invalid encoding file "splat"}
284test encoding-11.8 {encoding: extended Unicode UTF-16} {
285    viewable [encoding convertto utf-16le ��]
286} {=Ø9Þ (=\u00D89\u00DE)}
287test encoding-11.9 {encoding: extended Unicode UTF-16} {
288    viewable [encoding convertto utf-16be ��]
289} {Ø=Þ9 (\u00D8=\u00DE9)}
290# OpenEncodingFile is fully tested by the rest of the tests in this file.
291
292test encoding-12.1 {LoadTableEncoding: normal encoding} {
293    set x [encoding convertto iso8859-3 Ġ]
294    append x [encoding convertto iso8859-3 Õ]
295    append x [encoding convertfrom iso8859-3 Õ]
296} "Õ?Ġ"
297test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
298    set x [encoding convertto iso8859-3 abĠg]
299    append x [encoding convertfrom iso8859-3 abÕg]
300} "abÕgabĠg"
301test encoding-12.3 {LoadTableEncoding: multi-byte encoding} {
302    set x [encoding convertto shiftjis ab乎g]
303    append x [encoding convertfrom shiftjis ab\x8C\xC1g]
304} "ab\x8C\xC1gab乎g"
305test encoding-12.4 {LoadTableEncoding: double-byte encoding} {
306    set x [encoding convertto jis0208 乎α]
307    append x [encoding convertfrom jis0208 8C&A]
308} "8C&A乎α"
309test encoding-12.5 {LoadTableEncoding: symbol encoding} {
310    set x [encoding convertto symbol γ]
311    append x [encoding convertto symbol g]
312    append x [encoding convertfrom symbol g]
313} "ggγ"
314
315test encoding-13.1 {LoadEscapeTable} {
316    viewable [set x [encoding convertto iso2022 ab乎棙g]]
317} [viewable "ab\x1B\$B8C\x1B\$\(DD%\x1B(Bg"]
318
319test encoding-15.1 {UtfToUtfProc} {
320    encoding convertto utf-8 £
321} "\xC2\xA3"
322test encoding-15.2 {UtfToUtfProc null character output} testbytestring {
323    binary scan [testbytestring [encoding convertto utf-8 \x00]] H* z
324    set z
325} 00
326test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
327    set y [encoding convertfrom utf-8 [encoding convertto utf-8 \x00]]
328    binary scan [teststringbytes $y] H* z
329    set z
330} c080
331test encoding-15.4 {UtfToUtfProc emoji character input} -body {
332    set x \xED\xA0\xBD\xED\xB8\x82
333    set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82]
334    list [string length $x] $y
335} -result "6 ��"
336test encoding-15.5 {UtfToUtfProc emoji character input} {
337    set x \xF0\x9F\x98\x82
338    set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82]
339    list [string length $x] $y
340} "4 ��"
341test encoding-15.6 {UtfToUtfProc emoji character output} {
342    set x \uDE02\uD83D\uDE02\uD83D
343    set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
344    binary scan $y H* z
345    list [string length $y] $z
346} {10 efbfbdf09f9882efbfbd}
347test encoding-15.7 {UtfToUtfProc emoji character output} {
348    set x \uDE02\uD83D\uD83D
349    set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
350    binary scan $y H* z
351    list [string length $x] [string length $y] $z
352} {3 9 efbfbdefbfbdefbfbd}
353test encoding-15.8 {UtfToUtfProc emoji character output} {
354    set x \uDE02\uD83Dé
355    set y [encoding convertto utf-8 \uDE02\uD83Dé]
356    binary scan $y H* z
357    list [string length $x] [string length $y] $z
358} {3 8 efbfbdefbfbdc3a9}
359test encoding-15.9 {UtfToUtfProc emoji character output} {
360    set x \uDE02\uD83DX
361    set y [encoding convertto utf-8 \uDE02\uD83DX]
362    binary scan $y H* z
363    list [string length $x] [string length $y] $z
364} {3 7 efbfbdefbfbd58}
365test encoding-15.10 {UtfToUtfProc high surrogate character output} {
366    set x \uDE02é
367    set y [encoding convertto utf-8 \uDE02é]
368    binary scan $y H* z
369    list [string length $x] [string length $y] $z
370} {2 5 efbfbdc3a9}
371test encoding-15.11 {UtfToUtfProc low surrogate character output} {
372    set x \uDA02é
373    set y [encoding convertto utf-8 \uDA02é]
374    binary scan $y H* z
375    list [string length $x] [string length $y] $z
376} {2 5 efbfbdc3a9}
377test encoding-15.12 {UtfToUtfProc high surrogate character output} {
378    set x \uDE02Y
379    set y [encoding convertto utf-8 \uDE02Y]
380    binary scan $y H* z
381    list [string length $x] [string length $y] $z
382} {2 4 efbfbd59}
383test encoding-15.13 {UtfToUtfProc low surrogate character output} {
384    set x \uDA02Y
385    set y [encoding convertto utf-8 \uDA02Y]
386    binary scan $y H* z
387    list [string length $x] [string length $y] $z
388} {2 4 efbfbd59}
389test encoding-15.14 {UtfToUtfProc high surrogate character output} {
390    set x \uDE02
391    set y [encoding convertto utf-8 \uDE02]
392    binary scan $y H* z
393    list [string length $x] [string length $y] $z
394} {1 3 efbfbd}
395test encoding-15.15 {UtfToUtfProc low surrogate character output} {
396    set x \uDA02
397    set y [encoding convertto utf-8 \uDA02]
398    binary scan $y H* z
399    list [string length $x] [string length $y] $z
400} {1 3 efbfbd}
401test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
402    set x \xF0\xA0\xA1\xC2
403    set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2]
404    list [string length $x] $y
405} "4 \xF0\xA0\xA1\xC2"
406test encoding-15.17 {UtfToUtfProc emoji character output} {
407    set x ��
408    set y [encoding convertto utf-8 ��]
409    binary scan $y H* z
410    list [string length $y] $z
411} {4 f09f9882}
412test encoding-15.18 {UtfToUtfProc CESU-8 6-byte sequence} {
413    set y [encoding convertto cesu-8 \U10000]
414    binary scan $y H* z
415    list [string length $y] $z
416} {6 eda080edb080}
417test encoding-15.19 {UtfToUtfProc CESU-8 upper surrogate} {
418    set y [encoding convertto cesu-8 \uD800]
419    binary scan $y H* z
420    list [string length $y] $z
421} {3 eda080}
422test encoding-15.20 {UtfToUtfProc CESU-8 lower surrogate} {
423    set y [encoding convertto cesu-8 \uDC00]
424    binary scan $y H* z
425    list [string length $y] $z
426} {3 edb080}
427test encoding-15.21 {UtfToUtfProc CESU-8 noncharacter} {
428    set y [encoding convertto cesu-8 \uFFFF]
429    binary scan $y H* z
430    list [string length $y] $z
431} {3 efbfbf}
432
433test encoding-16.1 {Utf16ToUtfProc} -body {
434    set val [encoding convertfrom utf-16 NN]
435    list $val [format %x [scan $val %c]]
436} -result "乎 4e4e"
437test encoding-16.2 {Utf16ToUtfProc} -body {
438    set val [encoding convertfrom utf-16 "\xD8\xD8\xDC\xDC"]
439    list $val [format %x [scan $val %c]]
440} -result "\U460DC 460dc"
441test encoding-16.3 {Utf16ToUtfProc} -body {
442    set val [encoding convertfrom utf-16 "\xDC\xDC"]
443    list $val [format %x [scan $val %c]]
444} -result "\uDCDC dcdc"
445test encoding-16.4 {Ucs2ToUtfProc} -body {
446    set val [encoding convertfrom ucs-2 NN]
447    list $val [format %x [scan $val %c]]
448} -result "乎 4e4e"
449test encoding-16.4 {Ucs2ToUtfProc} -body {
450    set val [encoding convertfrom ucs-2 "\xD8\xD8\xDC\xDC"]
451    list $val [format %x [scan $val %c]]
452} -result "\U460DC 460dc"
453
454test encoding-17.1 {UtfToUtf16Proc} -body {
455    encoding convertto utf-16 "\U460DC"
456} -result "\xD8\xD8\xDC\xDC"
457test encoding-17.2 {UtfToUcs2Proc} -body {
458    encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"]
459} -result "\uFFFD"
460test encoding-17.3 {UtfToUtf16Proc} -body {
461    encoding convertto utf-16be "\uDCDC"
462} -result "\xFF\xFD"
463test encoding-17.4 {UtfToUtf16Proc} -body {
464    encoding convertto utf-16le "\uD8D8"
465} -result "\xFD\xFF"
466
467test encoding-18.1 {TableToUtfProc} {
468} {}
469
470test encoding-19.1 {TableFromUtfProc} {
471} {}
472
473test encoding-20.1 {TableFreefProc} {
474} {}
475
476test encoding-21.1 {EscapeToUtfProc} {
477} {}
478
479test encoding-22.1 {EscapeFromUtfProc} {
480} {}
481
482set iso2022encData "\x1B\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\x1B(B
483\x1B\$B>.@Z<jAwIU@h\$H\$7\$F;HMQ\$7\$F\$*\$j\$^\$9!#62\$lF~\$j\$^\$9\$,!\"@5\$7\$\$=;=j\$r\$4EPO?\$7\$J\$*\x1B(B
484\x1B\$B\$*4j\$\$\$\$\$?\$7\$^\$9!#\$^\$?!\"BgJQ62=L\$G\$9\$,!\"=;=jJQ99\$N\$\"\$H!\"F|K\\8l%5!<%S%9It!J\x1B(B
485casino_japanese@___.com \x1B\$B!K\$^\$G\$4=;=jJQ99:Q\$NO\"Mm\$r\$\$\$?\$@\$1\$J\$\$\$G\x1B(B
486\x1B\$B\$7\$g\$&\$+!)\x1B(B"
487
488set iso2022uniData [encoding convertfrom iso2022-jp $iso2022encData]
489set iso2022uniData2 "私どもでは、チップご購入時にご登録いただいたご住所をキャッシュアウトの際の
490小切手送付先として使用しております。恐れ入りますが、正しい住所をご登録しなお
491お願いいたします。また、大変恐縮ですが、住所変更のあと、日本語サービス部(
492casino_japanese@___.com )までご住所変更済の連絡をいただけないで
493しょうか?"
494
495cd [temporaryDirectory]
496set fid [open iso2022.txt w]
497fconfigure $fid -encoding binary
498puts -nonewline $fid $iso2022encData
499close $fid
500
501test encoding-23.1 {iso2022-jp escape encoding test} {
502    string equal $iso2022uniData $iso2022uniData2
503} 1
504test encoding-23.2 {iso2022-jp escape encoding test} {
505    # This checks that 'gets' isn't resetting the encoding inappropriately.
506    # [Bug #523988]
507    set fid [open iso2022.txt r]
508    fconfigure $fid -encoding iso2022-jp
509    set out ""
510    set count 0
511    while {[set num [gets $fid line]] >= 0} {
512	if {$count} {
513	    incr count 1 ; # account for newline
514	    append out \n
515	}
516	append out $line
517	incr count $num
518    }
519    close $fid
520    if {[string compare $iso2022uniData $out]} {
521	return -code error "iso2022-jp read in doesn't match original"
522    }
523    list $count $out
524} [list [string length $iso2022uniData] $iso2022uniData]
525test encoding-23.3 {iso2022-jp escape encoding test} {
526    # read $fis <size> reads size in chars, not raw bytes.
527    set fid [open iso2022.txt r]
528    fconfigure $fid -encoding iso2022-jp
529    set data [read $fid 50]
530    close $fid
531    return $data
532} [string range $iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50
533cd [workingDirectory]
534
535# Code to make the next few tests more intelligible; the code being tested
536# should be in the body of the test!
537proc runInSubprocess {contents {filename iso2022.tcl}} {
538    set theFile [makeFile $contents $filename]
539    try {
540	exec [interpreter] $theFile
541    } finally {
542	removeFile $theFile
543    }
544}
545
546test encoding-24.1 {EscapeFreeProc on open channels} exec {
547    runInSubprocess {
548	set f [open [file join [file dirname [info script]] iso2022.txt]]
549	fconfigure $f -encoding iso2022-jp
550	gets $f
551    }
552} {}
553test encoding-24.2 {EscapeFreeProc on open channels} {exec} {
554    # Bug #524674 output
555    viewable [runInSubprocess {
556	encoding system cp1252;	# Bug #2891556 crash revelator
557	fconfigure stdout -encoding iso2022-jp
558	puts ab乎棙g
559	set env(TCL_FINALIZE_ON_EXIT) 1
560	exit
561    }]
562} "ab\x1B\$B8C\x1B\$(DD%\x1B(Bg (ab\\u001B\$B8C\\u001B\$(DD%\\u001B(Bg)"
563test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
564    # Bug #219314 - if we don't free escape encodings correctly on channel
565    # closure, we go boom
566    set file [makeFile {
567	encoding system iso2022-jp
568	set a "乎乞也"; # 3 Japanese Kanji letters
569	puts $a
570    } iso2022.tcl]
571    set f [open "|[list [interpreter] $file]"]
572    fconfigure $f -encoding iso2022-jp
573    set count [gets $f line]
574    close $f
575    removeFile iso2022.tcl
576    list $count [viewable $line]
577} [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"]
578
579test encoding-24.4 {Parse valid or invalid utf-8} {
580    string length [encoding convertfrom utf-8 "\xC0\x80"]
581} 1
582test encoding-24.5 {Parse valid or invalid utf-8} {
583    string length [encoding convertfrom utf-8 "\xC0\x81"]
584} 2
585test encoding-24.6 {Parse valid or invalid utf-8} {
586    string length [encoding convertfrom utf-8 "\xC1\xBF"]
587} 2
588test encoding-24.7 {Parse valid or invalid utf-8} {
589    string length [encoding convertfrom utf-8 "\xC2\x80"]
590} 1
591test encoding-24.8 {Parse valid or invalid utf-8} {
592    string length [encoding convertfrom utf-8 "\xE0\x80\x80"]
593} 3
594test encoding-24.9 {Parse valid or invalid utf-8} {
595    string length [encoding convertfrom utf-8 "\xE0\x9F\xBF"]
596} 3
597test encoding-24.10 {Parse valid or invalid utf-8} {
598    string length [encoding convertfrom utf-8 "\xE0\xA0\x80"]
599} 1
600test encoding-24.11 {Parse valid or invalid utf-8} {
601    string length [encoding convertfrom utf-8 "\xEF\xBF\xBF"]
602} 1
603
604file delete [file join [temporaryDirectory] iso2022.txt]
605
606#
607# Begin jajp encoding round-trip conformity tests
608#
609proc foreach-jisx0208 {varName command} {
610    upvar 1 $varName code
611    foreach range {
612	{2121 217E}
613	{2221 222E}
614	{223A 2241}
615	{224A 2250}
616	{225C 226A}
617	{2272 2279}
618	{227E 227E}
619	{2330 2339}
620	{2421 2473}
621	{2521 2576}
622	{2821 2821}
623	{282C 282C}
624	{2837 2837}
625
626	{30 21 4E 7E}
627	{4F21 4F53}
628
629	{50 21 73 7E}
630	{7421 7426}
631    } {
632	if {[llength $range] == 2} {
633	    # for adhoc range. simple {first last}. inclusive.
634	    scan $range %x%x first last
635	    for {set i $first} {$i <= $last} {incr i} {
636		set code $i
637		uplevel 1 $command
638	    }
639	} elseif {[llength $range] == 4} {
640	    # for uniform range.
641	    scan $range %x%x%x%x h0 l0 hend lend
642	    for {set hi $h0} {$hi <= $hend} {incr hi} {
643		for {set lo $l0} {$lo <= $lend} {incr lo} {
644		    set code [expr {$hi << 8 | ($lo & 0xff)}]
645		    uplevel 1 $command
646		}
647	    }
648	} else {
649	    error "really?"
650	}
651    }
652}
653proc gen-jisx0208-euc-jp {code} {
654    binary format cc \
655	[expr {($code >> 8) | 0x80}] [expr {($code & 0xFF) | 0x80}]
656}
657proc gen-jisx0208-iso2022-jp {code} {
658    binary format a3cca3 \
659	"\x1B\$B" [expr {$code >> 8}] [expr {$code & 0xFF}] "\x1B(B"
660}
661proc gen-jisx0208-cp932 {code} {
662    set c1 [expr {($code >> 8) | 0x80}]
663    set c2 [expr {($code & 0xff)| 0x80}]
664    if {$c1 % 2} {
665	set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x31 : 0x71)}]
666	incr c2 [expr {- (0x60 + ($c2 < 0xE0))}]
667    } else {
668	set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x30 : 0x70)}]
669	incr c2 -2
670    }
671    binary format cc $c1 $c2
672}
673proc channel-diff {fa fb} {
674    set diff {}
675    while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} {
676	if {[string compare $la $lb] == 0} continue
677	# lappend diff $la $lb
678
679	# For more readable (easy to analyze) output.
680	set code [lindex $la 0]
681	binary scan [lindex $la 1] H* expected
682	binary scan [lindex $lb 1] H* got
683	lappend diff [list $code $expected $got]
684    }
685    return $diff
686}
687
688# Create char tables.
689cd [temporaryDirectory]
690foreach enc {cp932 euc-jp iso2022-jp} {
691    set f [open $enc.chars w]
692    fconfigure $f -encoding binary
693    foreach-jisx0208 code {
694	puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]]
695    }
696    close $f
697}
698# shiftjis == cp932 for jisx0208.
699file copy -force cp932.chars shiftjis.chars
700
701set NUM 0
702foreach from {cp932 shiftjis euc-jp iso2022-jp} {
703    foreach to {cp932 shiftjis euc-jp iso2022-jp} {
704	test encoding-25.[incr NUM] "jisx0208 $from => $to" -setup {
705	    cd [temporaryDirectory]
706	} -body {
707	    set f [open $from.chars]
708	    fconfigure $f -encoding $from
709	    set out [open $from.$to.tcltestout w]
710	    fconfigure $out -encoding $to
711	    puts -nonewline $out [read $f]
712	    close $out
713	    close $f
714	    # then compare $to.chars <=> $from.to.tcltestout as binary.
715	    set fa [open $to.chars rb]
716	    set fb [open $from.$to.tcltestout rb]
717	    channel-diff $fa $fb
718	    # Difference should be empty.
719	} -cleanup {
720	    close $fa
721	    close $fb
722	} -result {}
723    }
724}
725
726test encoding-26.0 {Tcl_GetEncodingSearchPath} -constraints {
727    testgetencpath
728} -setup {
729    set origPath [testgetencpath]
730    testsetencpath slappy
731} -body {
732    testgetencpath
733} -cleanup {
734    testsetencpath $origPath
735} -result slappy
736
737file delete {*}[glob -directory [temporaryDirectory] *.chars *.tcltestout]
738# ===> Cut here <===
739
740# EscapeFreeProc, GetTableEncoding, unilen are fully tested by the rest of
741# this file.
742
743
744test encoding-27.1 {encoding dirs basic behavior} -returnCodes error -body {
745    encoding dirs ? ?
746} -result {wrong # args: should be "encoding dirs ?dirList?"}
747test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body {
748    encoding dirs "\{not a list"
749} -result "expected directory list but got \"\{not a list\""
750
751}
752
753
754test encoding-28.0 {all encodings load} -body {
755	set string hello
756	foreach name [encoding names] {
757		incr count
758		encoding convertto $name $string
759
760		# discard the cached internal representation of Tcl_Encoding
761		# Unfortunately, without this, encoding 2-1 fails.
762		llength $name
763	}
764	return $count
765} -result [expr {[info exists ::tcl_precision] ? 87 : 86}]
766
767runtests
768
769}
770
771# cleanup
772namespace delete ::tcl::test::encoding
773::tcltest::cleanupTests
774return
775
776# Local Variables:
777# mode: tcl
778# End:
779