1package textseg 2 3import ( 4 "fmt" 5 "reflect" 6 "strings" 7 "testing" 8 "unicode/utf8" 9) 10 11func TestScanGraphemeClusters(t *testing.T) { 12 tests := unicodeGraphemeTests 13 14 for i, test := range tests { 15 t.Run(fmt.Sprintf("%03d-%x", i, test.input), func(t *testing.T) { 16 got, err := AllTokens(test.input, ScanGraphemeClusters) 17 18 if err != nil { 19 t.Fatalf("unexpected error: %s", err) 20 } 21 22 if !reflect.DeepEqual(got, test.output) { 23 // Also get the rune values resulting from decoding utf8, 24 // since they are generally easier to look up to figure out 25 // what's failing. 26 runes := make([]string, 0, len(test.input)) 27 seqs := make([][]byte, 0, len(test.input)) 28 categories := make([]string, 0, len(test.input)) 29 buf := test.input 30 for len(buf) > 0 { 31 r, size := utf8.DecodeRune(buf) 32 runes = append(runes, fmt.Sprintf("0x%04x", r)) 33 seqs = append(seqs, buf[:size]) 34 categories = append(categories, _GraphemeRuneType(r).String()) 35 buf = buf[size:] 36 } 37 38 t.Errorf( 39 "wrong result\ninput: %s\nutf8s: %s\nrunes: %s\ncats: %s\ngot: %s\nwant: %s", 40 formatBytes(test.input), 41 formatByteRanges(seqs), 42 strings.Join(runes, " "), 43 strings.Join(categories, " "), 44 formatByteRanges(got), 45 formatByteRanges(test.output), 46 ) 47 } 48 }) 49 } 50} 51 52func formatBytes(buf []byte) string { 53 strs := make([]string, len(buf)) 54 for i, b := range buf { 55 strs[i] = fmt.Sprintf("0x%02x", b) 56 } 57 return strings.Join(strs, " ") 58} 59 60func formatByteRanges(bufs [][]byte) string { 61 strs := make([]string, len(bufs)) 62 for i, b := range bufs { 63 strs[i] = formatBytes(b) 64 } 65 return strings.Join(strs, " | ") 66} 67