1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package search
6
7import (
8	"golang.org/x/text/internal/colltab"
9)
10
11// TODO: handle variable primary weights?
12
13func (p *Pattern) deleteEmptyElements() {
14	k := 0
15	for _, e := range p.ce {
16		if !isIgnorable(p.m, e) {
17			p.ce[k] = e
18			k++
19		}
20	}
21	p.ce = p.ce[:k]
22}
23
24func isIgnorable(m *Matcher, e colltab.Elem) bool {
25	if e.Primary() > 0 {
26		return false
27	}
28	if e.Secondary() > 0 {
29		if !m.ignoreDiacritics {
30			return false
31		}
32		// Primary value is 0 and ignoreDiacritics is true. In this case we
33		// ignore the tertiary element, as it only pertains to the modifier.
34		return true
35	}
36	// TODO: further distinguish once we have the new implementation.
37	if !(m.ignoreWidth || m.ignoreCase) && e.Tertiary() > 0 {
38		return false
39	}
40	// TODO: we ignore the Quaternary level for now.
41	return true
42}
43
44// TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching.
45
46func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) {
47	for start := 0; it.Next(); it.Reset(start) {
48		nextStart := it.End()
49		if end := p.searchOnce(it); end != -1 {
50			return start, end
51		}
52		start = nextStart
53	}
54	return -1, -1
55}
56
57func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) {
58	if it.Next() {
59		if end := p.searchOnce(it); end != -1 {
60			return 0, end
61		}
62	}
63	return -1, -1
64}
65
66// next advances to the next weight in a pattern. f must return one of the
67// weights of a collation element. next will advance to the first non-zero
68// weight and return this weight and true if it exists, or 0, false otherwise.
69func (p *Pattern) next(i *int, f func(colltab.Elem) int) (weight int, ok bool) {
70	for *i < len(p.ce) {
71		v := f(p.ce[*i])
72		*i++
73		if v != 0 {
74			// Skip successive ignorable values.
75			for ; *i < len(p.ce) && f(p.ce[*i]) == 0; *i++ {
76			}
77			return v, true
78		}
79	}
80	return 0, false
81}
82
83// TODO: remove this function once Elem is internal and Tertiary returns int.
84func tertiary(e colltab.Elem) int {
85	return int(e.Tertiary())
86}
87
88// searchOnce tries to match the pattern s.p at the text position i. s.buf needs
89// to be filled with collation elements of the first segment, where n is the
90// number of source bytes consumed for this segment. It will return the end
91// position of the match or -1.
92func (p *Pattern) searchOnce(it *colltab.Iter) (end int) {
93	var pLevel [4]int
94
95	m := p.m
96	for {
97		k := 0
98		for ; k < it.N; k++ {
99			if v := it.Elems[k].Primary(); v > 0 {
100				if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w {
101					return -1
102				}
103			}
104
105			if !m.ignoreDiacritics {
106				if v := it.Elems[k].Secondary(); v > 0 {
107					if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w {
108						return -1
109					}
110				}
111			} else if it.Elems[k].Primary() == 0 {
112				// We ignore tertiary values of collation elements of the
113				// secondary level.
114				continue
115			}
116
117			// TODO: distinguish between case and width. This will be easier to
118			// implement after we moved to the new collation implementation.
119			if !m.ignoreWidth && !m.ignoreCase {
120				if v := it.Elems[k].Tertiary(); v > 0 {
121					if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w {
122						return -1
123					}
124				}
125			}
126			// TODO: check quaternary weight
127		}
128		it.Discard() // Remove the current segment from the buffer.
129
130		// Check for completion.
131		switch {
132		// If any of these cases match, we are not at the end.
133		case pLevel[0] < len(p.ce):
134		case !m.ignoreDiacritics && pLevel[1] < len(p.ce):
135		case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce):
136		default:
137			// At this point, both the segment and pattern has matched fully.
138			// However, the segment may still be have trailing modifiers.
139			// This can be verified by another call to next.
140			end = it.End()
141			if it.Next() && it.Elems[0].Primary() == 0 {
142				if !m.ignoreDiacritics {
143					return -1
144				}
145				end = it.End()
146			}
147			return end
148		}
149
150		// Fill the buffer with the next batch of collation elements.
151		if !it.Next() {
152			return -1
153		}
154	}
155}
156