1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package rangetable
6
7import (
8	"unicode"
9)
10
11// atEnd is used to mark a completed iteration.
12const atEnd = unicode.MaxRune + 1
13
14// Merge returns a new RangeTable that is the union of the given tables.
15// It can also be used to compact user-created RangeTables. The entries in
16// R16 and R32 for any given RangeTable should be sorted and non-overlapping.
17//
18// A lookup in the resulting table can be several times faster than using In
19// directly on the ranges. Merge is an expensive operation, however, and only
20// makes sense if one intends to use the result for more than a couple of
21// hundred lookups.
22func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
23	rt := &unicode.RangeTable{}
24	if len(ranges) == 0 {
25		return rt
26	}
27
28	iter := tablesIter(make([]tableIndex, len(ranges)))
29
30	for i, t := range ranges {
31		iter[i] = tableIndex{t, 0, atEnd}
32		if len(t.R16) > 0 {
33			iter[i].next = rune(t.R16[0].Lo)
34		}
35	}
36
37	if r0 := iter.next16(); r0.Stride != 0 {
38		for {
39			r1 := iter.next16()
40			if r1.Stride == 0 {
41				rt.R16 = append(rt.R16, r0)
42				break
43			}
44			stride := r1.Lo - r0.Hi
45			if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
46				// Fully merge the next range into the previous one.
47				r0.Hi, r0.Stride = r1.Hi, stride
48				continue
49			} else if stride == r0.Stride {
50				// Move the first element of r1 to r0. This may eliminate an
51				// entry.
52				r0.Hi = r1.Lo
53				r0.Stride = stride
54				r1.Lo = r1.Lo + r1.Stride
55				if r1.Lo > r1.Hi {
56					continue
57				}
58			}
59			rt.R16 = append(rt.R16, r0)
60			r0 = r1
61		}
62	}
63
64	for i, t := range ranges {
65		iter[i] = tableIndex{t, 0, atEnd}
66		if len(t.R32) > 0 {
67			iter[i].next = rune(t.R32[0].Lo)
68		}
69	}
70
71	if r0 := iter.next32(); r0.Stride != 0 {
72		for {
73			r1 := iter.next32()
74			if r1.Stride == 0 {
75				rt.R32 = append(rt.R32, r0)
76				break
77			}
78			stride := r1.Lo - r0.Hi
79			if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
80				// Fully merge the next range into the previous one.
81				r0.Hi, r0.Stride = r1.Hi, stride
82				continue
83			} else if stride == r0.Stride {
84				// Move the first element of r1 to r0. This may eliminate an
85				// entry.
86				r0.Hi = r1.Lo
87				r1.Lo = r1.Lo + r1.Stride
88				if r1.Lo > r1.Hi {
89					continue
90				}
91			}
92			rt.R32 = append(rt.R32, r0)
93			r0 = r1
94		}
95	}
96
97	for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
98		rt.LatinOffset = i + 1
99	}
100
101	return rt
102}
103
104type tableIndex struct {
105	t    *unicode.RangeTable
106	p    uint32
107	next rune
108}
109
110type tablesIter []tableIndex
111
112// sortIter does an insertion sort using the next field of tableIndex. Insertion
113// sort is a good sorting algorithm for this case.
114func sortIter(t []tableIndex) {
115	for i := range t {
116		for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
117			t[j], t[j-1] = t[j-1], t[j]
118		}
119	}
120}
121
122// next16 finds the ranged to be added to the table. If ranges overlap between
123// multiple tables it clips the result to a non-overlapping range if the
124// elements are not fully subsumed. It returns a zero range if there are no more
125// ranges.
126func (ti tablesIter) next16() unicode.Range16 {
127	sortIter(ti)
128
129	t0 := ti[0]
130	if t0.next == atEnd {
131		return unicode.Range16{}
132	}
133	r0 := t0.t.R16[t0.p]
134	r0.Lo = uint16(t0.next)
135
136	// We restrict the Hi of the current range if it overlaps with another range.
137	for i := range ti {
138		tn := ti[i]
139		// Since our tableIndices are sorted by next, we can break if the there
140		// is no overlap. The first value of a next range can always be merged
141		// into the current one, so we can break in case of equality as well.
142		if rune(r0.Hi) <= tn.next {
143			break
144		}
145		rn := tn.t.R16[tn.p]
146		rn.Lo = uint16(tn.next)
147
148		// Limit r0.Hi based on next ranges in list, but allow it to overlap
149		// with ranges as long as it subsumes it.
150		m := (rn.Lo - r0.Lo) % r0.Stride
151		if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
152			// Overlap, take the min of the two Hi values: for simplicity's sake
153			// we only process one range at a time.
154			if r0.Hi > rn.Hi {
155				r0.Hi = rn.Hi
156			}
157		} else {
158			// Not a compatible stride. Set to the last possible value before
159			// rn.Lo, but ensure there is at least one value.
160			if x := rn.Lo - m; r0.Lo <= x {
161				r0.Hi = x
162			}
163			break
164		}
165	}
166
167	// Update the next values for each table.
168	for i := range ti {
169		tn := &ti[i]
170		if rune(r0.Hi) < tn.next {
171			break
172		}
173		rn := tn.t.R16[tn.p]
174		stride := rune(rn.Stride)
175		tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
176		if rune(rn.Hi) < tn.next {
177			if tn.p++; int(tn.p) == len(tn.t.R16) {
178				tn.next = atEnd
179			} else {
180				tn.next = rune(tn.t.R16[tn.p].Lo)
181			}
182		}
183	}
184
185	if r0.Lo == r0.Hi {
186		r0.Stride = 1
187	}
188
189	return r0
190}
191
192// next32 finds the ranged to be added to the table. If ranges overlap between
193// multiple tables it clips the result to a non-overlapping range if the
194// elements are not fully subsumed. It returns a zero range if there are no more
195// ranges.
196func (ti tablesIter) next32() unicode.Range32 {
197	sortIter(ti)
198
199	t0 := ti[0]
200	if t0.next == atEnd {
201		return unicode.Range32{}
202	}
203	r0 := t0.t.R32[t0.p]
204	r0.Lo = uint32(t0.next)
205
206	// We restrict the Hi of the current range if it overlaps with another range.
207	for i := range ti {
208		tn := ti[i]
209		// Since our tableIndices are sorted by next, we can break if the there
210		// is no overlap. The first value of a next range can always be merged
211		// into the current one, so we can break in case of equality as well.
212		if rune(r0.Hi) <= tn.next {
213			break
214		}
215		rn := tn.t.R32[tn.p]
216		rn.Lo = uint32(tn.next)
217
218		// Limit r0.Hi based on next ranges in list, but allow it to overlap
219		// with ranges as long as it subsumes it.
220		m := (rn.Lo - r0.Lo) % r0.Stride
221		if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
222			// Overlap, take the min of the two Hi values: for simplicity's sake
223			// we only process one range at a time.
224			if r0.Hi > rn.Hi {
225				r0.Hi = rn.Hi
226			}
227		} else {
228			// Not a compatible stride. Set to the last possible value before
229			// rn.Lo, but ensure there is at least one value.
230			if x := rn.Lo - m; r0.Lo <= x {
231				r0.Hi = x
232			}
233			break
234		}
235	}
236
237	// Update the next values for each table.
238	for i := range ti {
239		tn := &ti[i]
240		if rune(r0.Hi) < tn.next {
241			break
242		}
243		rn := tn.t.R32[tn.p]
244		stride := rune(rn.Stride)
245		tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
246		if rune(rn.Hi) < tn.next {
247			if tn.p++; int(tn.p) == len(tn.t.R32) {
248				tn.next = atEnd
249			} else {
250				tn.next = rune(tn.t.R32[tn.p].Lo)
251			}
252		}
253	}
254
255	if r0.Lo == r0.Hi {
256		r0.Stride = 1
257	}
258
259	return r0
260}
261