xref: /freebsd/contrib/one-true-awk/testdir/T.utf (revision f32a6403)
1*f32a6403SWarner Losh#!/bin/sh
2*f32a6403SWarner Losh
3*f32a6403SWarner Loshecho T.utf: tests of utf functions
4*f32a6403SWarner Losh
5*f32a6403SWarner Loshawk=${awk-../a.out}
6*f32a6403SWarner Losh
7*f32a6403SWarner Losh$awk '
8*f32a6403SWarner LoshBEGIN {
9*f32a6403SWarner Losh	FS = "\t"
10*f32a6403SWarner Losh	awk = "../a.out"
11*f32a6403SWarner Losh}
12*f32a6403SWarner LoshNF == 0 || $1 ~ /^#/ {
13*f32a6403SWarner Losh	next
14*f32a6403SWarner Losh}
15*f32a6403SWarner Losh$1 ~ /try/ {	# new test
16*f32a6403SWarner Losh	nt++
17*f32a6403SWarner Losh	sub(/try [a-zA-Z_0-9]+ /, "")
18*f32a6403SWarner Losh	prog = $0
19*f32a6403SWarner Losh	printf("try %3d %s\n", nt, prog)
20*f32a6403SWarner Losh	prog = sprintf("%s -F\"\\t\" '"'"'%s'"'"'", awk, prog)
21*f32a6403SWarner Losh	# print "prog is", prog
22*f32a6403SWarner Losh	nt2 = 0
23*f32a6403SWarner Losh	while (getline > 0) {
24*f32a6403SWarner Losh		if (NF == 0)	# blank line terminates a sequence
25*f32a6403SWarner Losh			break
26*f32a6403SWarner Losh		input = $1
27*f32a6403SWarner Losh		for (i = 2; i < NF; i++)	# input data
28*f32a6403SWarner Losh			input = input "\t" $i
29*f32a6403SWarner Losh		test = sprintf("./echo '"'"'%s'"'"' | %s >foo1; ",
30*f32a6403SWarner Losh			input, prog)
31*f32a6403SWarner Losh		if ($NF == "\"\"")
32*f32a6403SWarner Losh			output = ">foo2;"
33*f32a6403SWarner Losh		else
34*f32a6403SWarner Losh			output = sprintf("./echo '"'"'%s'"'"' >foo2; ", $NF)
35*f32a6403SWarner Losh		gsub(/\\t/, "\t", output)
36*f32a6403SWarner Losh		gsub(/\\n/, "\n", output)
37*f32a6403SWarner Losh		run = sprintf("diff foo1 foo2 || echo test %d.%d failed",
38*f32a6403SWarner Losh			nt, ++nt2)
39*f32a6403SWarner Losh		# print  "input is", input
40*f32a6403SWarner Losh		# print  "test is", test
41*f32a6403SWarner Losh		# print  "output is", output
42*f32a6403SWarner Losh		# print  "run is", run
43*f32a6403SWarner Losh		system(test output run)
44*f32a6403SWarner Losh	}
45*f32a6403SWarner Losh	tt += nt2
46*f32a6403SWarner Losh}
47*f32a6403SWarner LoshEND { print tt, "tests" }
48*f32a6403SWarner Losh' <<\!!!!
49*f32a6403SWarner Losh# General format:
50*f32a6403SWarner Losh# try program as rest of line
51*f32a6403SWarner Losh# $1	$2	$3	output1  (\t for tab, \n for newline,
52*f32a6403SWarner Losh# $1	$2	$3	output2  ("" for null)
53*f32a6403SWarner Losh# ... terminated by blank line
54*f32a6403SWarner Losh
55*f32a6403SWarner Losh# try another program...
56*f32a6403SWarner Losh
57*f32a6403SWarner Loshtry length { print length($1) }
58*f32a6403SWarner Losh	0
59*f32a6403SWarner Losha	1
60*f32a6403SWarner Loshの今がその時だ	7
61*f32a6403SWarner LoshСейчас	6
62*f32a6403SWarner Losh现在是时候了	6
63*f32a6403SWarner Losh给所有的好男	6
64*f32a6403SWarner Losh来参加聚会。	6
65*f32a6403SWarner Losh��	1
66*f32a6403SWarner Losh�� finger	8
67*f32a6403SWarner LoshΤωρα	4
68*f32a6403SWarner Loshγια	3
69*f32a6403SWarner Loshνα	2
70*f32a6403SWarner Loshעכשיו	5
71*f32a6403SWarner Loshלכל	3
72*f32a6403SWarner Loshלבוא	4
73*f32a6403SWarner Loshの今がその時だ	7
74*f32a6403SWarner Losh지금이	3
75*f32a6403SWarner Losh모든	2
76*f32a6403SWarner Losh파티에	3
77*f32a6403SWarner LoshСейчас	6
78*f32a6403SWarner Loshдля	3
79*f32a6403SWarner Loshприйти	6
80*f32a6403SWarner Losh
81*f32a6403SWarner Loshtry index { print index($1, $2) }
82*f32a6403SWarner Loshabc	a	1
83*f32a6403SWarner Loshabc	b	2
84*f32a6403SWarner Loshabc	x	0
85*f32a6403SWarner Losh现在是时候了	""	0
86*f32a6403SWarner Losh现在是时候了	了	6
87*f32a6403SWarner Losh现在是时候了	在是	2
88*f32a6403SWarner Losh现在是时候了	x	0
89*f32a6403SWarner Loshx在是时候了	x	2
90*f32a6403SWarner Losh�� fingerすべての善人のためにすべての善人のために	f	3
91*f32a6403SWarner Losh�� finger��	r��	8
92*f32a6403SWarner Losh
93*f32a6403SWarner Loshtry substr { print substr($0, 2, 3) }
94*f32a6403SWarner Loshabcdef	bcd
95*f32a6403SWarner LoshΤωρα ειναι η	ωρα
96*f32a6403SWarner LoshΤω	ω
97*f32a6403SWarner Losh지금 이절호의	금 이
98*f32a6403SWarner Loshxпyрийти	пyр
99*f32a6403SWarner Losh
100*f32a6403SWarner Loshtry rematch { print $1 ~ $2 }
101*f32a6403SWarner Loshabc	a	1
102*f32a6403SWarner Loshabc	x	0
103*f32a6403SWarner Loshすべての善人のために	の	1
104*f32a6403SWarner Loshすべての善人のために	の.*の	1
105*f32a6403SWarner Loshすべての善人のために	の.*て	0
106*f32a6403SWarner LoshΤωρα	ω+	1
107*f32a6403SWarner Losh
108*f32a6403SWarner Losh# replace first occurrence of $2 by $3 in $1
109*f32a6403SWarner Loshtry sub { n = sub($2, $3, $1); print n, $1 }
110*f32a6403SWarner Loshabcdef	bc	XYZ	1 aXYZdef
111*f32a6403SWarner Loshabcdef	xy	XYZ	0 abcdef
112*f32a6403SWarner Loshの今がその時だ	の	NO	1 NO今がその時だ
113*f32a6403SWarner Losh�� finger	��.*g	FING	1 FINGer
114*f32a6403SWarner LoshСейчас	.	x	1 xейчас
115*f32a6403SWarner Losh
116*f32a6403SWarner Losh# replace all occurrences of $2 by $3 in $1
117*f32a6403SWarner Loshtry gsub { n = gsub($2, $3, $1); print n, $1 }
118*f32a6403SWarner Loshabcdef	bc	XYZ	1 aXYZdef
119*f32a6403SWarner Loshabcdef	xy	XYZ	0 abcdef
120*f32a6403SWarner Loshの今がその時だ	の	NO	2 NO今がそNO時だ
121*f32a6403SWarner Losh�� finger	��.*g	FING	1 FINGer
122*f32a6403SWarner LoshСейчас	.	x	6 xxxxxx
123*f32a6403SWarner Losh
124*f32a6403SWarner Loshtry match { print match($1, $2), RSTART, RLENGTH }
125*f32a6403SWarner Loshabc	[^a]	2 2 1
126*f32a6403SWarner Loshabc	[^ab]	3 3 1
127*f32a6403SWarner Loshすべての善人のために	[^す]	2 2 1
128*f32a6403SWarner Loshすべての善人のために	[^ぁ-ゖ]	5 5 1
129*f32a6403SWarner Loshabc	a	1 1 1
130*f32a6403SWarner Loshabc	x	0 0 -1
131*f32a6403SWarner Loshすべての善人のために	の	4 4 1
132*f32a6403SWarner Loshすべての善人のために	の.*の	4 4 4
133*f32a6403SWarner Loshすべての善人のために	の.*て	0 0 -1
134*f32a6403SWarner LoshΤωρα	ω+	2 2 1
135*f32a6403SWarner LoshΤωρα	x+	0 0 -1
136*f32a6403SWarner LoshΤωρα	ω.	2 2 2
137*f32a6403SWarner Loshすべての善人のために	[の]	4 4 1
138*f32a6403SWarner Loshすべての善人のために	[ぁ-え]	0 0 -1
139*f32a6403SWarner Loshすべての善人のために	[^ぁ-え]	1 1 1
140*f32a6403SWarner LoshΤωρα ειναι η	[α-ω]	2 2 1
141*f32a6403SWarner LoshΤωρα ειναι η	[α-ω]+	2 2 3
142*f32a6403SWarner LoshxxxΤωρα ειναι η	[Α-Ω]	4 4 1
143*f32a6403SWarner Loshγια όλους τους καλούς ά	α.*α	3 3 15
144*f32a6403SWarner Loshνα έρθει στο πά	[^ν]	2 2 1
145*f32a6403SWarner Losh
146*f32a6403SWarner Losh# FS="" should split into unicode chars
147*f32a6403SWarner Loshtry emptyFS BEGIN {FS=""} {print NF}
148*f32a6403SWarner Loshすべての善人のために	10
149*f32a6403SWarner Loshの今がその時だ	7
150*f32a6403SWarner LoshСейчас	6
151*f32a6403SWarner Losh现在是时候了	6
152*f32a6403SWarner Losh给所有的好男	6
153*f32a6403SWarner Losh来参加聚会。	6
154*f32a6403SWarner Losh��	1
155*f32a6403SWarner Losh�� finger	8
156*f32a6403SWarner Losh
157*f32a6403SWarner Losh# printf(%N.Ns) for utf8 strings
158*f32a6403SWarner Loshtry printfs1 {printf("[%5.2s][%-5.2s]\n"), $1, $1}
159*f32a6403SWarner Loshabcd	[   ab][ab   ]
160*f32a6403SWarner Losh现在abc	[   现在][现在   ]
161*f32a6403SWarner Losh现ωabc	[   现ω][现ω   ]
162*f32a6403SWarner Loshωabc	[   ωa][ωa   ]
163*f32a6403SWarner LoshСейчас	[   Се][Се   ]
164*f32a6403SWarner LoshСейxyz	[   Се][Се   ]
165*f32a6403SWarner Losh��	[    ��][��    ]
166*f32a6403SWarner Losh
167*f32a6403SWarner Losh# printf(%N.Ns) for utf8 strings
168*f32a6403SWarner Loshtry printfs2 {printf("[%5s][%-5s]\n"), $1, $1}
169*f32a6403SWarner Loshabcd	[ abcd][abcd ]
170*f32a6403SWarner Losh现在ab	[ 现在ab][现在ab ]
171*f32a6403SWarner Losha现在ab	[a现在ab][a现在ab]
172*f32a6403SWarner Losha现在abc	[a现在abc][a现在abc]
173*f32a6403SWarner Losh现ωab	[ 现ωab][现ωab ]
174*f32a6403SWarner Loshωabc	[ ωabc][ωabc ]
175*f32a6403SWarner LoshСейчас	[Сейчас][Сейчас]
176*f32a6403SWarner Losh��	[    ��][��    ]
177*f32a6403SWarner Losh
178*f32a6403SWarner Losh# printf(%N.Ns) for utf8 strings
179*f32a6403SWarner Loshtry printfs3 {printf("[%.2s][%-.2s]\n"), $1, $1}
180*f32a6403SWarner Loshabcd	[ab][ab]
181*f32a6403SWarner Losh现在abc	[现在][现在]
182*f32a6403SWarner Losh现ωabc	[现ω][现ω]
183*f32a6403SWarner Loshω	[ω][ω]
184*f32a6403SWarner Losh��	[��][��]
185*f32a6403SWarner Losh
186*f32a6403SWarner Losh# printf(%c) for utf
187*f32a6403SWarner Loshtry printfc {printf("%c %c\n", $1, substr($1,2,1))}
188*f32a6403SWarner Loshすべての善人のために	す べ
189*f32a6403SWarner Loshの今がその時だ	の 今
190*f32a6403SWarner LoshСейчас	С е
191*f32a6403SWarner Losh现在是时候了	现 在
192*f32a6403SWarner Losh����	�� ��
193*f32a6403SWarner Losh
194*f32a6403SWarner Losh!!!!
195