1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate go run gen.go gen_common.go
6
7// Package plural provides utilities for handling linguistic plurals in text.
8//
9// The definitions in this package are based on the plural rule handling defined
10// in CLDR. See
11// https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules for
12// details.
13package plural
14
15import (
16	"golang.org/x/text/internal/language/compact"
17	"golang.org/x/text/internal/number"
18	"golang.org/x/text/language"
19)
20
21// Rules defines the plural rules for all languages for a certain plural type.
22//
23//
24// This package is UNDER CONSTRUCTION and its API may change.
25type Rules struct {
26	rules          []pluralCheck
27	index          []byte
28	langToIndex    []byte
29	inclusionMasks []uint64
30}
31
32var (
33	// Cardinal defines the plural rules for numbers indicating quantities.
34	Cardinal *Rules = cardinal
35
36	// Ordinal defines the plural rules for numbers indicating position
37	// (first, second, etc.).
38	Ordinal *Rules = ordinal
39
40	ordinal = &Rules{
41		ordinalRules,
42		ordinalIndex,
43		ordinalLangToIndex,
44		ordinalInclusionMasks[:],
45	}
46
47	cardinal = &Rules{
48		cardinalRules,
49		cardinalIndex,
50		cardinalLangToIndex,
51		cardinalInclusionMasks[:],
52	}
53)
54
55// getIntApprox converts the digits in slice digits[start:end] to an integer
56// according to the following rules:
57//	- Let i be asInt(digits[start:end]), where out-of-range digits are assumed
58//	  to be zero.
59//	- Result n is big if i / 10^nMod > 1.
60//	- Otherwise the result is i % 10^nMod.
61//
62// For example, if digits is {1, 2, 3} and start:end is 0:5, then the result
63// for various values of nMod is:
64//	- when nMod == 2, n == big
65//	- when nMod == 3, n == big
66//	- when nMod == 4, n == big
67//	- when nMod == 5, n == 12300
68//	- when nMod == 6, n == 12300
69//	- when nMod == 7, n == 12300
70func getIntApprox(digits []byte, start, end, nMod, big int) (n int) {
71	// Leading 0 digits just result in 0.
72	p := start
73	if p < 0 {
74		p = 0
75	}
76	// Range only over the part for which we have digits.
77	mid := end
78	if mid >= len(digits) {
79		mid = len(digits)
80	}
81	// Check digits more significant that nMod.
82	if q := end - nMod; q > 0 {
83		if q > mid {
84			q = mid
85		}
86		for ; p < q; p++ {
87			if digits[p] != 0 {
88				return big
89			}
90		}
91	}
92	for ; p < mid; p++ {
93		n = 10*n + int(digits[p])
94	}
95	// Multiply for trailing zeros.
96	for ; p < end; p++ {
97		n *= 10
98	}
99	return n
100}
101
102// MatchDigits computes the plural form for the given language and the given
103// decimal floating point digits. The digits are stored in big-endian order and
104// are of value byte(0) - byte(9). The floating point position is indicated by
105// exp and the number of visible decimals is scale. All leading and trailing
106// zeros may be omitted from digits.
107//
108// The following table contains examples of possible arguments to represent
109// the given numbers.
110//      decimal    digits              exp    scale
111//      123        []byte{1, 2, 3}     3      0
112//      123.4      []byte{1, 2, 3, 4}  3      1
113//      123.40     []byte{1, 2, 3, 4}  3      2
114//      100000     []byte{1}           6      0
115//      100000.00  []byte{1}           6      3
116func (p *Rules) MatchDigits(t language.Tag, digits []byte, exp, scale int) Form {
117	index := tagToID(t)
118
119	// Differentiate up to including mod 1000000 for the integer part.
120	n := getIntApprox(digits, 0, exp, 6, 1000000)
121
122	// Differentiate up to including mod 100 for the fractional part.
123	f := getIntApprox(digits, exp, exp+scale, 2, 100)
124
125	return matchPlural(p, index, n, f, scale)
126}
127
128func (p *Rules) matchDisplayDigits(t language.Tag, d *number.Digits) (Form, int) {
129	n := getIntApprox(d.Digits, 0, int(d.Exp), 6, 1000000)
130	return p.MatchDigits(t, d.Digits, int(d.Exp), d.NumFracDigits()), n
131}
132
133func validForms(p *Rules, t language.Tag) (forms []Form) {
134	offset := p.langToIndex[tagToID(t)]
135	rules := p.rules[p.index[offset]:p.index[offset+1]]
136
137	forms = append(forms, Other)
138	last := Other
139	for _, r := range rules {
140		if cat := Form(r.cat & formMask); cat != andNext && last != cat {
141			forms = append(forms, cat)
142			last = cat
143		}
144	}
145	return forms
146}
147
148func (p *Rules) matchComponents(t language.Tag, n, f, scale int) Form {
149	return matchPlural(p, tagToID(t), n, f, scale)
150}
151
152// MatchPlural returns the plural form for the given language and plural
153// operands (as defined in
154// https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules):
155//  where
156//  	n  absolute value of the source number (integer and decimals)
157//  input
158//  	i  integer digits of n.
159//  	v  number of visible fraction digits in n, with trailing zeros.
160//  	w  number of visible fraction digits in n, without trailing zeros.
161//  	f  visible fractional digits in n, with trailing zeros (f = t * 10^(v-w))
162//  	t  visible fractional digits in n, without trailing zeros.
163//
164// If any of the operand values is too large to fit in an int, it is okay to
165// pass the value modulo 10,000,000.
166func (p *Rules) MatchPlural(lang language.Tag, i, v, w, f, t int) Form {
167	return matchPlural(p, tagToID(lang), i, f, v)
168}
169
170func matchPlural(p *Rules, index compact.ID, n, f, v int) Form {
171	nMask := p.inclusionMasks[n%maxMod]
172	// Compute the fMask inline in the rules below, as it is relatively rare.
173	// fMask := p.inclusionMasks[f%maxMod]
174	vMask := p.inclusionMasks[v%maxMod]
175
176	// Do the matching
177	offset := p.langToIndex[index]
178	rules := p.rules[p.index[offset]:p.index[offset+1]]
179	for i := 0; i < len(rules); i++ {
180		rule := rules[i]
181		setBit := uint64(1 << rule.setID)
182		var skip bool
183		switch op := opID(rule.cat >> opShift); op {
184		case opI: // i = x
185			skip = n >= numN || nMask&setBit == 0
186
187		case opI | opNotEqual: // i != x
188			skip = n < numN && nMask&setBit != 0
189
190		case opI | opMod: // i % m = x
191			skip = nMask&setBit == 0
192
193		case opI | opMod | opNotEqual: // i % m != x
194			skip = nMask&setBit != 0
195
196		case opN: // n = x
197			skip = f != 0 || n >= numN || nMask&setBit == 0
198
199		case opN | opNotEqual: // n != x
200			skip = f == 0 && n < numN && nMask&setBit != 0
201
202		case opN | opMod: // n % m = x
203			skip = f != 0 || nMask&setBit == 0
204
205		case opN | opMod | opNotEqual: // n % m != x
206			skip = f == 0 && nMask&setBit != 0
207
208		case opF: // f = x
209			skip = f >= numN || p.inclusionMasks[f%maxMod]&setBit == 0
210
211		case opF | opNotEqual: // f != x
212			skip = f < numN && p.inclusionMasks[f%maxMod]&setBit != 0
213
214		case opF | opMod: // f % m = x
215			skip = p.inclusionMasks[f%maxMod]&setBit == 0
216
217		case opF | opMod | opNotEqual: // f % m != x
218			skip = p.inclusionMasks[f%maxMod]&setBit != 0
219
220		case opV: // v = x
221			skip = v < numN && vMask&setBit == 0
222
223		case opV | opNotEqual: // v != x
224			skip = v < numN && vMask&setBit != 0
225
226		case opW: // w == 0
227			skip = f != 0
228
229		case opW | opNotEqual: // w != 0
230			skip = f == 0
231
232		// Hard-wired rules that cannot be handled by our algorithm.
233
234		case opBretonM:
235			skip = f != 0 || n == 0 || n%1000000 != 0
236
237		case opAzerbaijan00s:
238			// 100,200,300,400,500,600,700,800,900
239			skip = n == 0 || n >= 1000 || n%100 != 0
240
241		case opItalian800:
242			skip = (f != 0 || n >= numN || nMask&setBit == 0) && n != 800
243		}
244		if skip {
245			// advance over AND entries.
246			for ; i < len(rules) && rules[i].cat&formMask == andNext; i++ {
247			}
248			continue
249		}
250		// return if we have a final entry.
251		if cat := rule.cat & formMask; cat != andNext {
252			return Form(cat)
253		}
254	}
255	return Other
256}
257
258func tagToID(t language.Tag) compact.ID {
259	id, _ := compact.RegionalID(compact.Tag(t))
260	return id
261}
262