1*f32a6403SWarner Losh#!/bin/sh 2*f32a6403SWarner Losh 3*f32a6403SWarner Loshecho T.utf: tests of utf functions 4*f32a6403SWarner Losh 5*f32a6403SWarner Loshawk=${awk-../a.out} 6*f32a6403SWarner Losh 7*f32a6403SWarner Losh$awk ' 8*f32a6403SWarner LoshBEGIN { 9*f32a6403SWarner Losh FS = "\t" 10*f32a6403SWarner Losh awk = "../a.out" 11*f32a6403SWarner Losh} 12*f32a6403SWarner LoshNF == 0 || $1 ~ /^#/ { 13*f32a6403SWarner Losh next 14*f32a6403SWarner Losh} 15*f32a6403SWarner Losh$1 ~ /try/ { # new test 16*f32a6403SWarner Losh nt++ 17*f32a6403SWarner Losh sub(/try [a-zA-Z_0-9]+ /, "") 18*f32a6403SWarner Losh prog = $0 19*f32a6403SWarner Losh printf("try %3d %s\n", nt, prog) 20*f32a6403SWarner Losh prog = sprintf("%s -F\"\\t\" '"'"'%s'"'"'", awk, prog) 21*f32a6403SWarner Losh # print "prog is", prog 22*f32a6403SWarner Losh nt2 = 0 23*f32a6403SWarner Losh while (getline > 0) { 24*f32a6403SWarner Losh if (NF == 0) # blank line terminates a sequence 25*f32a6403SWarner Losh break 26*f32a6403SWarner Losh input = $1 27*f32a6403SWarner Losh for (i = 2; i < NF; i++) # input data 28*f32a6403SWarner Losh input = input "\t" $i 29*f32a6403SWarner Losh test = sprintf("./echo '"'"'%s'"'"' | %s >foo1; ", 30*f32a6403SWarner Losh input, prog) 31*f32a6403SWarner Losh if ($NF == "\"\"") 32*f32a6403SWarner Losh output = ">foo2;" 33*f32a6403SWarner Losh else 34*f32a6403SWarner Losh output = sprintf("./echo '"'"'%s'"'"' >foo2; ", $NF) 35*f32a6403SWarner Losh gsub(/\\t/, "\t", output) 36*f32a6403SWarner Losh gsub(/\\n/, "\n", output) 37*f32a6403SWarner Losh run = sprintf("diff foo1 foo2 || echo test %d.%d failed", 38*f32a6403SWarner Losh nt, ++nt2) 39*f32a6403SWarner Losh # print "input is", input 40*f32a6403SWarner Losh # print "test is", test 41*f32a6403SWarner Losh # print "output is", output 42*f32a6403SWarner Losh # print "run is", run 43*f32a6403SWarner Losh system(test output run) 44*f32a6403SWarner Losh } 45*f32a6403SWarner Losh tt += nt2 46*f32a6403SWarner Losh} 47*f32a6403SWarner LoshEND { print tt, "tests" } 48*f32a6403SWarner Losh' <<\!!!! 49*f32a6403SWarner Losh# General format: 50*f32a6403SWarner Losh# try program as rest of line 51*f32a6403SWarner Losh# $1 $2 $3 output1 (\t for tab, \n for newline, 52*f32a6403SWarner Losh# $1 $2 $3 output2 ("" for null) 53*f32a6403SWarner Losh# ... terminated by blank line 54*f32a6403SWarner Losh 55*f32a6403SWarner Losh# try another program... 56*f32a6403SWarner Losh 57*f32a6403SWarner Loshtry length { print length($1) } 58*f32a6403SWarner Losh 0 59*f32a6403SWarner Losha 1 60*f32a6403SWarner Loshの今がその時だ 7 61*f32a6403SWarner LoshСейчас 6 62*f32a6403SWarner Losh现在是时候了 6 63*f32a6403SWarner Losh给所有的好男 6 64*f32a6403SWarner Losh来参加聚会。 6 65*f32a6403SWarner Losh 1 66*f32a6403SWarner Losh finger 8 67*f32a6403SWarner LoshΤωρα 4 68*f32a6403SWarner Loshγια 3 69*f32a6403SWarner Loshνα 2 70*f32a6403SWarner Loshעכשיו 5 71*f32a6403SWarner Loshלכל 3 72*f32a6403SWarner Loshלבוא 4 73*f32a6403SWarner Loshの今がその時だ 7 74*f32a6403SWarner Losh지금이 3 75*f32a6403SWarner Losh모든 2 76*f32a6403SWarner Losh파티에 3 77*f32a6403SWarner LoshСейчас 6 78*f32a6403SWarner Loshдля 3 79*f32a6403SWarner Loshприйти 6 80*f32a6403SWarner Losh 81*f32a6403SWarner Loshtry index { print index($1, $2) } 82*f32a6403SWarner Loshabc a 1 83*f32a6403SWarner Loshabc b 2 84*f32a6403SWarner Loshabc x 0 85*f32a6403SWarner Losh现在是时候了 "" 0 86*f32a6403SWarner Losh现在是时候了 了 6 87*f32a6403SWarner Losh现在是时候了 在是 2 88*f32a6403SWarner Losh现在是时候了 x 0 89*f32a6403SWarner Losh现x在是时候了 x 2 90*f32a6403SWarner Losh fingerすべての善人のためにすべての善人のために f 3 91*f32a6403SWarner Losh finger r 8 92*f32a6403SWarner Losh 93*f32a6403SWarner Loshtry substr { print substr($0, 2, 3) } 94*f32a6403SWarner Loshabcdef bcd 95*f32a6403SWarner LoshΤωρα ειναι η ωρα 96*f32a6403SWarner LoshΤω ω 97*f32a6403SWarner Losh지금 이절호의 금 이 98*f32a6403SWarner Loshxпyрийти пyр 99*f32a6403SWarner Losh 100*f32a6403SWarner Loshtry rematch { print $1 ~ $2 } 101*f32a6403SWarner Loshabc a 1 102*f32a6403SWarner Loshabc x 0 103*f32a6403SWarner Loshすべての善人のために の 1 104*f32a6403SWarner Loshすべての善人のために の.*の 1 105*f32a6403SWarner Loshすべての善人のために の.*て 0 106*f32a6403SWarner LoshΤωρα ω+ 1 107*f32a6403SWarner Losh 108*f32a6403SWarner Losh# replace first occurrence of $2 by $3 in $1 109*f32a6403SWarner Loshtry sub { n = sub($2, $3, $1); print n, $1 } 110*f32a6403SWarner Loshabcdef bc XYZ 1 aXYZdef 111*f32a6403SWarner Loshabcdef xy XYZ 0 abcdef 112*f32a6403SWarner Loshの今がその時だ の NO 1 NO今がその時だ 113*f32a6403SWarner Losh finger .*g FING 1 FINGer 114*f32a6403SWarner LoshСейчас . x 1 xейчас 115*f32a6403SWarner Losh 116*f32a6403SWarner Losh# replace all occurrences of $2 by $3 in $1 117*f32a6403SWarner Loshtry gsub { n = gsub($2, $3, $1); print n, $1 } 118*f32a6403SWarner Loshabcdef bc XYZ 1 aXYZdef 119*f32a6403SWarner Loshabcdef xy XYZ 0 abcdef 120*f32a6403SWarner Loshの今がその時だ の NO 2 NO今がそNO時だ 121*f32a6403SWarner Losh finger .*g FING 1 FINGer 122*f32a6403SWarner LoshСейчас . x 6 xxxxxx 123*f32a6403SWarner Losh 124*f32a6403SWarner Loshtry match { print match($1, $2), RSTART, RLENGTH } 125*f32a6403SWarner Loshabc [^a] 2 2 1 126*f32a6403SWarner Loshabc [^ab] 3 3 1 127*f32a6403SWarner Loshすべての善人のために [^す] 2 2 1 128*f32a6403SWarner Loshすべての善人のために [^ぁ-ゖ] 5 5 1 129*f32a6403SWarner Loshabc a 1 1 1 130*f32a6403SWarner Loshabc x 0 0 -1 131*f32a6403SWarner Loshすべての善人のために の 4 4 1 132*f32a6403SWarner Loshすべての善人のために の.*の 4 4 4 133*f32a6403SWarner Loshすべての善人のために の.*て 0 0 -1 134*f32a6403SWarner LoshΤωρα ω+ 2 2 1 135*f32a6403SWarner LoshΤωρα x+ 0 0 -1 136*f32a6403SWarner LoshΤωρα ω. 2 2 2 137*f32a6403SWarner Loshすべての善人のために [の] 4 4 1 138*f32a6403SWarner Loshすべての善人のために [ぁ-え] 0 0 -1 139*f32a6403SWarner Loshすべての善人のために [^ぁ-え] 1 1 1 140*f32a6403SWarner LoshΤωρα ειναι η [α-ω] 2 2 1 141*f32a6403SWarner LoshΤωρα ειναι η [α-ω]+ 2 2 3 142*f32a6403SWarner LoshxxxΤωρα ειναι η [Α-Ω] 4 4 1 143*f32a6403SWarner Loshγια όλους τους καλούς ά α.*α 3 3 15 144*f32a6403SWarner Loshνα έρθει στο πά [^ν] 2 2 1 145*f32a6403SWarner Losh 146*f32a6403SWarner Losh# FS="" should split into unicode chars 147*f32a6403SWarner Loshtry emptyFS BEGIN {FS=""} {print NF} 148*f32a6403SWarner Loshすべての善人のために 10 149*f32a6403SWarner Loshの今がその時だ 7 150*f32a6403SWarner LoshСейчас 6 151*f32a6403SWarner Losh现在是时候了 6 152*f32a6403SWarner Losh给所有的好男 6 153*f32a6403SWarner Losh来参加聚会。 6 154*f32a6403SWarner Losh 1 155*f32a6403SWarner Losh finger 8 156*f32a6403SWarner Losh 157*f32a6403SWarner Losh# printf(%N.Ns) for utf8 strings 158*f32a6403SWarner Loshtry printfs1 {printf("[%5.2s][%-5.2s]\n"), $1, $1} 159*f32a6403SWarner Loshabcd [ ab][ab ] 160*f32a6403SWarner Losh现在abc [ 现在][现在 ] 161*f32a6403SWarner Losh现ωabc [ 现ω][现ω ] 162*f32a6403SWarner Loshωabc [ ωa][ωa ] 163*f32a6403SWarner LoshСейчас [ Се][Се ] 164*f32a6403SWarner LoshСейxyz [ Се][Се ] 165*f32a6403SWarner Losh [ ][ ] 166*f32a6403SWarner Losh 167*f32a6403SWarner Losh# printf(%N.Ns) for utf8 strings 168*f32a6403SWarner Loshtry printfs2 {printf("[%5s][%-5s]\n"), $1, $1} 169*f32a6403SWarner Loshabcd [ abcd][abcd ] 170*f32a6403SWarner Losh现在ab [ 现在ab][现在ab ] 171*f32a6403SWarner Losha现在ab [a现在ab][a现在ab] 172*f32a6403SWarner Losha现在abc [a现在abc][a现在abc] 173*f32a6403SWarner Losh现ωab [ 现ωab][现ωab ] 174*f32a6403SWarner Loshωabc [ ωabc][ωabc ] 175*f32a6403SWarner LoshСейчас [Сейчас][Сейчас] 176*f32a6403SWarner Losh [ ][ ] 177*f32a6403SWarner Losh 178*f32a6403SWarner Losh# printf(%N.Ns) for utf8 strings 179*f32a6403SWarner Loshtry printfs3 {printf("[%.2s][%-.2s]\n"), $1, $1} 180*f32a6403SWarner Loshabcd [ab][ab] 181*f32a6403SWarner Losh现在abc [现在][现在] 182*f32a6403SWarner Losh现ωabc [现ω][现ω] 183*f32a6403SWarner Loshω [ω][ω] 184*f32a6403SWarner Losh [][] 185*f32a6403SWarner Losh 186*f32a6403SWarner Losh# printf(%c) for utf 187*f32a6403SWarner Loshtry printfc {printf("%c %c\n", $1, substr($1,2,1))} 188*f32a6403SWarner Loshすべての善人のために す べ 189*f32a6403SWarner Loshの今がその時だ の 今 190*f32a6403SWarner LoshСейчас С е 191*f32a6403SWarner Losh现在是时候了 现 在 192*f32a6403SWarner Losh 193*f32a6403SWarner Losh 194*f32a6403SWarner Losh!!!! 195