1/*
2Copyright 2015 The Perkeep Authors
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8     http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17// Package bytereplacer provides a utility for replacing parts of byte slices.
18package bytereplacer // import "go4.org/bytereplacer"
19
20import "bytes"
21
22// Replacer replaces a list of strings with replacements.
23// It is safe for concurrent use by multiple goroutines.
24type Replacer struct {
25	r replacer
26}
27
28// replacer is the interface that a replacement algorithm needs to implement.
29type replacer interface {
30	// Replace performs all replacements, in-place if possible.
31	Replace(s []byte) []byte
32}
33
34// New returns a new Replacer from a list of old, new string pairs.
35// Replacements are performed in order, without overlapping matches.
36func New(oldnew ...string) *Replacer {
37	if len(oldnew)%2 == 1 {
38		panic("bytes.NewReplacer: odd argument count")
39	}
40
41	allNewBytes := true
42	for i := 0; i < len(oldnew); i += 2 {
43		if len(oldnew[i]) != 1 {
44			return &Replacer{r: makeGenericReplacer(oldnew)}
45		}
46		if len(oldnew[i+1]) != 1 {
47			allNewBytes = false
48		}
49	}
50
51	if allNewBytes {
52		r := byteReplacer{}
53		for i := range r {
54			r[i] = byte(i)
55		}
56		// The first occurrence of old->new map takes precedence
57		// over the others with the same old string.
58		for i := len(oldnew) - 2; i >= 0; i -= 2 {
59			o := oldnew[i][0]
60			n := oldnew[i+1][0]
61			r[o] = n
62		}
63		return &Replacer{r: &r}
64	}
65
66	return &Replacer{r: makeGenericReplacer(oldnew)}
67}
68
69// Replace performs all replacements in-place on s. If the capacity
70// of s is not sufficient, a new slice is allocated, otherwise Replace
71// returns s.
72func (r *Replacer) Replace(s []byte) []byte {
73	return r.r.Replace(s)
74}
75
76type trieNode struct {
77	value    []byte
78	priority int
79	prefix   []byte
80	next     *trieNode
81	table    []*trieNode
82}
83
84func (t *trieNode) add(key, val []byte, priority int, r *genericReplacer) {
85	if len(key) == 0 {
86		if t.priority == 0 {
87			t.value = val
88			t.priority = priority
89		}
90		return
91	}
92
93	if len(t.prefix) > 0 {
94		// Need to split the prefix among multiple nodes.
95		var n int // length of the longest common prefix
96		for ; n < len(t.prefix) && n < len(key); n++ {
97			if t.prefix[n] != key[n] {
98				break
99			}
100		}
101		if n == len(t.prefix) {
102			t.next.add(key[n:], val, priority, r)
103		} else if n == 0 {
104			// First byte differs, start a new lookup table here. Looking up
105			// what is currently t.prefix[0] will lead to prefixNode, and
106			// looking up key[0] will lead to keyNode.
107			var prefixNode *trieNode
108			if len(t.prefix) == 1 {
109				prefixNode = t.next
110			} else {
111				prefixNode = &trieNode{
112					prefix: t.prefix[1:],
113					next:   t.next,
114				}
115			}
116			keyNode := new(trieNode)
117			t.table = make([]*trieNode, r.tableSize)
118			t.table[r.mapping[t.prefix[0]]] = prefixNode
119			t.table[r.mapping[key[0]]] = keyNode
120			t.prefix = nil
121			t.next = nil
122			keyNode.add(key[1:], val, priority, r)
123		} else {
124			// Insert new node after the common section of the prefix.
125			next := &trieNode{
126				prefix: t.prefix[n:],
127				next:   t.next,
128			}
129			t.prefix = t.prefix[:n]
130			t.next = next
131			next.add(key[n:], val, priority, r)
132		}
133	} else if t.table != nil {
134		// Insert into existing table.
135		m := r.mapping[key[0]]
136		if t.table[m] == nil {
137			t.table[m] = new(trieNode)
138		}
139		t.table[m].add(key[1:], val, priority, r)
140	} else {
141		t.prefix = key
142		t.next = new(trieNode)
143		t.next.add(nil, val, priority, r)
144	}
145}
146
147func (r *genericReplacer) lookup(s []byte, ignoreRoot bool) (val []byte, keylen int, found bool) {
148	// Iterate down the trie to the end, and grab the value and keylen with
149	// the highest priority.
150	bestPriority := 0
151	node := &r.root
152	n := 0
153	for node != nil {
154		if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
155			bestPriority = node.priority
156			val = node.value
157			keylen = n
158			found = true
159		}
160
161		if len(s) == 0 {
162			break
163		}
164		if node.table != nil {
165			index := r.mapping[s[0]]
166			if int(index) == r.tableSize {
167				break
168			}
169			node = node.table[index]
170			s = s[1:]
171			n++
172		} else if len(node.prefix) > 0 && bytes.HasPrefix(s, node.prefix) {
173			n += len(node.prefix)
174			s = s[len(node.prefix):]
175			node = node.next
176		} else {
177			break
178		}
179	}
180	return
181}
182
183// genericReplacer is the fully generic algorithm.
184// It's used as a fallback when nothing faster can be used.
185type genericReplacer struct {
186	root trieNode
187	// tableSize is the size of a trie node's lookup table. It is the number
188	// of unique key bytes.
189	tableSize int
190	// mapping maps from key bytes to a dense index for trieNode.table.
191	mapping [256]byte
192}
193
194func makeGenericReplacer(oldnew []string) *genericReplacer {
195	r := new(genericReplacer)
196	// Find each byte used, then assign them each an index.
197	for i := 0; i < len(oldnew); i += 2 {
198		key := oldnew[i]
199		for j := 0; j < len(key); j++ {
200			r.mapping[key[j]] = 1
201		}
202	}
203
204	for _, b := range r.mapping {
205		r.tableSize += int(b)
206	}
207
208	var index byte
209	for i, b := range r.mapping {
210		if b == 0 {
211			r.mapping[i] = byte(r.tableSize)
212		} else {
213			r.mapping[i] = index
214			index++
215		}
216	}
217	// Ensure root node uses a lookup table (for performance).
218	r.root.table = make([]*trieNode, r.tableSize)
219
220	for i := 0; i < len(oldnew); i += 2 {
221		r.root.add([]byte(oldnew[i]), []byte(oldnew[i+1]), len(oldnew)-i, r)
222	}
223	return r
224}
225
226func (r *genericReplacer) Replace(s []byte) []byte {
227	var last int
228	var prevMatchEmpty bool
229	dst := s[:0]
230	grown := false
231	for i := 0; i <= len(s); {
232		// Fast path: s[i] is not a prefix of any pattern.
233		if i != len(s) && r.root.priority == 0 {
234			index := int(r.mapping[s[i]])
235			if index == r.tableSize || r.root.table[index] == nil {
236				i++
237				continue
238			}
239		}
240
241		// Ignore the empty match iff the previous loop found the empty match.
242		val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
243		prevMatchEmpty = match && keylen == 0
244		if match {
245			dst = append(dst, s[last:i]...)
246			if diff := len(val) - keylen; grown || diff < 0 {
247				dst = append(dst, val...)
248				i += keylen
249			} else if diff <= cap(s)-len(s) {
250				// The replacement is larger than the original, but can still fit in the original buffer.
251				copy(s[i+len(val):cap(dst)], s[i+keylen:])
252				dst = append(dst, val...)
253				s = s[:len(s)+diff]
254				i += len(val)
255			} else {
256				// The output will grow larger than the original buffer.  Allocate a new one.
257				grown = true
258				newDst := make([]byte, len(dst), cap(dst)+diff)
259				copy(newDst, dst)
260				dst = newDst
261
262				dst = append(dst, val...)
263				i += keylen
264			}
265			last = i
266			continue
267		}
268		i++
269	}
270	if last != len(s) {
271		dst = append(dst, s[last:]...)
272	}
273	return dst
274}
275
276// byteReplacer is the implementation that's used when all the "old"
277// and "new" values are single ASCII bytes.
278// The array contains replacement bytes indexed by old byte.
279type byteReplacer [256]byte
280
281func (r *byteReplacer) Replace(s []byte) []byte {
282	for i, b := range s {
283		s[i] = r[b]
284	}
285	return s
286}
287