1package textseg
2
3import (
4	"fmt"
5	"reflect"
6	"strings"
7	"testing"
8	"unicode/utf8"
9)
10
11func TestScanGraphemeClusters(t *testing.T) {
12	tests := unicodeGraphemeTests
13
14	for i, test := range tests {
15		t.Run(fmt.Sprintf("%03d-%x", i, test.input), func(t *testing.T) {
16			got, err := AllTokens(test.input, ScanGraphemeClusters)
17
18			if err != nil {
19				t.Fatalf("unexpected error: %s", err)
20			}
21
22			if !reflect.DeepEqual(got, test.output) {
23				// Also get the rune values resulting from decoding utf8,
24				// since they are generally easier to look up to figure out
25				// what's failing.
26				runes := make([]string, 0, len(test.input))
27				seqs := make([][]byte, 0, len(test.input))
28				categories := make([]string, 0, len(test.input))
29				buf := test.input
30				for len(buf) > 0 {
31					r, size := utf8.DecodeRune(buf)
32					runes = append(runes, fmt.Sprintf("0x%04x", r))
33					seqs = append(seqs, buf[:size])
34					categories = append(categories, _GraphemeRuneType(r).String())
35					buf = buf[size:]
36				}
37
38				t.Errorf(
39					"wrong result\ninput: %s\nutf8s: %s\nrunes: %s\ncats:  %s\ngot:   %s\nwant:  %s",
40					formatBytes(test.input),
41					formatByteRanges(seqs),
42					strings.Join(runes, " "),
43					strings.Join(categories, " "),
44					formatByteRanges(got),
45					formatByteRanges(test.output),
46				)
47			}
48		})
49	}
50}
51
52func formatBytes(buf []byte) string {
53	strs := make([]string, len(buf))
54	for i, b := range buf {
55		strs[i] = fmt.Sprintf("0x%02x", b)
56	}
57	return strings.Join(strs, "   ")
58}
59
60func formatByteRanges(bufs [][]byte) string {
61	strs := make([]string, len(bufs))
62	for i, b := range bufs {
63		strs[i] = formatBytes(b)
64	}
65	return strings.Join(strs, " | ")
66}
67