1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate stringer -type=Kind
6//go:generate go run gen.go gen_common.go gen_trieval.go
7
8// Package width provides functionality for handling different widths in text.
9//
10// Wide characters behave like ideographs; they tend to allow line breaks after
11// each character and remain upright in vertical text layout. Narrow characters
12// are kept together in words or runs that are rotated sideways in vertical text
13// layout.
14//
15// For more information, see https://unicode.org/reports/tr11/.
16package width // import "golang.org/x/text/width"
17
18import (
19	"unicode/utf8"
20
21	"golang.org/x/text/transform"
22)
23
24// TODO
25// 1) Reduce table size by compressing blocks.
26// 2) API proposition for computing display length
27//    (approximation, fixed pitch only).
28// 3) Implement display length.
29
30// Kind indicates the type of width property as defined in https://unicode.org/reports/tr11/.
31type Kind int
32
33const (
34	// Neutral characters do not occur in legacy East Asian character sets.
35	Neutral Kind = iota
36
37	// EastAsianAmbiguous characters that can be sometimes wide and sometimes
38	// narrow and require additional information not contained in the character
39	// code to further resolve their width.
40	EastAsianAmbiguous
41
42	// EastAsianWide characters are wide in its usual form. They occur only in
43	// the context of East Asian typography. These runes may have explicit
44	// halfwidth counterparts.
45	EastAsianWide
46
47	// EastAsianNarrow characters are narrow in its usual form. They often have
48	// fullwidth counterparts.
49	EastAsianNarrow
50
51	// Note: there exist Narrow runes that do not have fullwidth or wide
52	// counterparts, despite what the definition says (e.g. U+27E6).
53
54	// EastAsianFullwidth characters have a compatibility decompositions of type
55	// wide that map to a narrow counterpart.
56	EastAsianFullwidth
57
58	// EastAsianHalfwidth characters have a compatibility decomposition of type
59	// narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
60	// SIGN.
61	EastAsianHalfwidth
62
63	// Note: there exist runes that have a halfwidth counterparts but that are
64	// classified as Ambiguous, rather than wide (e.g. U+2190).
65)
66
67// TODO: the generated tries need to return size 1 for invalid runes for the
68// width to be computed correctly (each byte should render width 1)
69
70var trie = newWidthTrie(0)
71
72// Lookup reports the Properties of the first rune in b and the number of bytes
73// of its UTF-8 encoding.
74func Lookup(b []byte) (p Properties, size int) {
75	v, sz := trie.lookup(b)
76	return Properties{elem(v), b[sz-1]}, sz
77}
78
79// LookupString reports the Properties of the first rune in s and the number of
80// bytes of its UTF-8 encoding.
81func LookupString(s string) (p Properties, size int) {
82	v, sz := trie.lookupString(s)
83	return Properties{elem(v), s[sz-1]}, sz
84}
85
86// LookupRune reports the Properties of rune r.
87func LookupRune(r rune) Properties {
88	var buf [4]byte
89	n := utf8.EncodeRune(buf[:], r)
90	v, _ := trie.lookup(buf[:n])
91	last := byte(r)
92	if r >= utf8.RuneSelf {
93		last = 0x80 + byte(r&0x3f)
94	}
95	return Properties{elem(v), last}
96}
97
98// Properties provides access to width properties of a rune.
99type Properties struct {
100	elem elem
101	last byte
102}
103
104func (e elem) kind() Kind {
105	return Kind(e >> typeShift)
106}
107
108// Kind returns the Kind of a rune as defined in Unicode TR #11.
109// See https://unicode.org/reports/tr11/ for more details.
110func (p Properties) Kind() Kind {
111	return p.elem.kind()
112}
113
114// Folded returns the folded variant of a rune or 0 if the rune is canonical.
115func (p Properties) Folded() rune {
116	if p.elem&tagNeedsFold != 0 {
117		buf := inverseData[byte(p.elem)]
118		buf[buf[0]] ^= p.last
119		r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
120		return r
121	}
122	return 0
123}
124
125// Narrow returns the narrow variant of a rune or 0 if the rune is already
126// narrow or doesn't have a narrow variant.
127func (p Properties) Narrow() rune {
128	if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
129		buf := inverseData[byte(p.elem)]
130		buf[buf[0]] ^= p.last
131		r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
132		return r
133	}
134	return 0
135}
136
137// Wide returns the wide variant of a rune or 0 if the rune is already
138// wide or doesn't have a wide variant.
139func (p Properties) Wide() rune {
140	if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
141		buf := inverseData[byte(p.elem)]
142		buf[buf[0]] ^= p.last
143		r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
144		return r
145	}
146	return 0
147}
148
149// TODO for Properties:
150// - Add Fullwidth/Halfwidth or Inverted methods for computing variants
151// mapping.
152// - Add width information (including information on non-spacing runes).
153
154// Transformer implements the transform.Transformer interface.
155type Transformer struct {
156	t transform.SpanningTransformer
157}
158
159// Reset implements the transform.Transformer interface.
160func (t Transformer) Reset() { t.t.Reset() }
161
162// Transform implements the transform.Transformer interface.
163func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
164	return t.t.Transform(dst, src, atEOF)
165}
166
167// Span implements the transform.SpanningTransformer interface.
168func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
169	return t.t.Span(src, atEOF)
170}
171
172// Bytes returns a new byte slice with the result of applying t to b.
173func (t Transformer) Bytes(b []byte) []byte {
174	b, _, _ = transform.Bytes(t, b)
175	return b
176}
177
178// String returns a string with the result of applying t to s.
179func (t Transformer) String(s string) string {
180	s, _, _ = transform.String(t, s)
181	return s
182}
183
184var (
185	// Fold is a transform that maps all runes to their canonical width.
186	//
187	// Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
188	// provide a more generic folding mechanism.
189	Fold Transformer = Transformer{foldTransform{}}
190
191	// Widen is a transform that maps runes to their wide variant, if
192	// available.
193	Widen Transformer = Transformer{wideTransform{}}
194
195	// Narrow is a transform that maps runes to their narrow variant, if
196	// available.
197	Narrow Transformer = Transformer{narrowTransform{}}
198)
199
200// TODO: Consider the following options:
201// - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
202//   generalized variant of this.
203// - Consider a wide Won character to be the default width (or some generalized
204//   variant of this).
205// - Filter the set of characters that gets converted (the preferred approach is
206//   to allow applying filters to transforms).
207