1/* 2Copyright 2015 The Perkeep Authors 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17// Package bytereplacer provides a utility for replacing parts of byte slices. 18package bytereplacer // import "go4.org/bytereplacer" 19 20import "bytes" 21 22// Replacer replaces a list of strings with replacements. 23// It is safe for concurrent use by multiple goroutines. 24type Replacer struct { 25 r replacer 26} 27 28// replacer is the interface that a replacement algorithm needs to implement. 29type replacer interface { 30 // Replace performs all replacements, in-place if possible. 31 Replace(s []byte) []byte 32} 33 34// New returns a new Replacer from a list of old, new string pairs. 35// Replacements are performed in order, without overlapping matches. 36func New(oldnew ...string) *Replacer { 37 if len(oldnew)%2 == 1 { 38 panic("bytes.NewReplacer: odd argument count") 39 } 40 41 allNewBytes := true 42 for i := 0; i < len(oldnew); i += 2 { 43 if len(oldnew[i]) != 1 { 44 return &Replacer{r: makeGenericReplacer(oldnew)} 45 } 46 if len(oldnew[i+1]) != 1 { 47 allNewBytes = false 48 } 49 } 50 51 if allNewBytes { 52 r := byteReplacer{} 53 for i := range r { 54 r[i] = byte(i) 55 } 56 // The first occurrence of old->new map takes precedence 57 // over the others with the same old string. 58 for i := len(oldnew) - 2; i >= 0; i -= 2 { 59 o := oldnew[i][0] 60 n := oldnew[i+1][0] 61 r[o] = n 62 } 63 return &Replacer{r: &r} 64 } 65 66 return &Replacer{r: makeGenericReplacer(oldnew)} 67} 68 69// Replace performs all replacements in-place on s. If the capacity 70// of s is not sufficient, a new slice is allocated, otherwise Replace 71// returns s. 72func (r *Replacer) Replace(s []byte) []byte { 73 return r.r.Replace(s) 74} 75 76type trieNode struct { 77 value []byte 78 priority int 79 prefix []byte 80 next *trieNode 81 table []*trieNode 82} 83 84func (t *trieNode) add(key, val []byte, priority int, r *genericReplacer) { 85 if len(key) == 0 { 86 if t.priority == 0 { 87 t.value = val 88 t.priority = priority 89 } 90 return 91 } 92 93 if len(t.prefix) > 0 { 94 // Need to split the prefix among multiple nodes. 95 var n int // length of the longest common prefix 96 for ; n < len(t.prefix) && n < len(key); n++ { 97 if t.prefix[n] != key[n] { 98 break 99 } 100 } 101 if n == len(t.prefix) { 102 t.next.add(key[n:], val, priority, r) 103 } else if n == 0 { 104 // First byte differs, start a new lookup table here. Looking up 105 // what is currently t.prefix[0] will lead to prefixNode, and 106 // looking up key[0] will lead to keyNode. 107 var prefixNode *trieNode 108 if len(t.prefix) == 1 { 109 prefixNode = t.next 110 } else { 111 prefixNode = &trieNode{ 112 prefix: t.prefix[1:], 113 next: t.next, 114 } 115 } 116 keyNode := new(trieNode) 117 t.table = make([]*trieNode, r.tableSize) 118 t.table[r.mapping[t.prefix[0]]] = prefixNode 119 t.table[r.mapping[key[0]]] = keyNode 120 t.prefix = nil 121 t.next = nil 122 keyNode.add(key[1:], val, priority, r) 123 } else { 124 // Insert new node after the common section of the prefix. 125 next := &trieNode{ 126 prefix: t.prefix[n:], 127 next: t.next, 128 } 129 t.prefix = t.prefix[:n] 130 t.next = next 131 next.add(key[n:], val, priority, r) 132 } 133 } else if t.table != nil { 134 // Insert into existing table. 135 m := r.mapping[key[0]] 136 if t.table[m] == nil { 137 t.table[m] = new(trieNode) 138 } 139 t.table[m].add(key[1:], val, priority, r) 140 } else { 141 t.prefix = key 142 t.next = new(trieNode) 143 t.next.add(nil, val, priority, r) 144 } 145} 146 147func (r *genericReplacer) lookup(s []byte, ignoreRoot bool) (val []byte, keylen int, found bool) { 148 // Iterate down the trie to the end, and grab the value and keylen with 149 // the highest priority. 150 bestPriority := 0 151 node := &r.root 152 n := 0 153 for node != nil { 154 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { 155 bestPriority = node.priority 156 val = node.value 157 keylen = n 158 found = true 159 } 160 161 if len(s) == 0 { 162 break 163 } 164 if node.table != nil { 165 index := r.mapping[s[0]] 166 if int(index) == r.tableSize { 167 break 168 } 169 node = node.table[index] 170 s = s[1:] 171 n++ 172 } else if len(node.prefix) > 0 && bytes.HasPrefix(s, node.prefix) { 173 n += len(node.prefix) 174 s = s[len(node.prefix):] 175 node = node.next 176 } else { 177 break 178 } 179 } 180 return 181} 182 183// genericReplacer is the fully generic algorithm. 184// It's used as a fallback when nothing faster can be used. 185type genericReplacer struct { 186 root trieNode 187 // tableSize is the size of a trie node's lookup table. It is the number 188 // of unique key bytes. 189 tableSize int 190 // mapping maps from key bytes to a dense index for trieNode.table. 191 mapping [256]byte 192} 193 194func makeGenericReplacer(oldnew []string) *genericReplacer { 195 r := new(genericReplacer) 196 // Find each byte used, then assign them each an index. 197 for i := 0; i < len(oldnew); i += 2 { 198 key := oldnew[i] 199 for j := 0; j < len(key); j++ { 200 r.mapping[key[j]] = 1 201 } 202 } 203 204 for _, b := range r.mapping { 205 r.tableSize += int(b) 206 } 207 208 var index byte 209 for i, b := range r.mapping { 210 if b == 0 { 211 r.mapping[i] = byte(r.tableSize) 212 } else { 213 r.mapping[i] = index 214 index++ 215 } 216 } 217 // Ensure root node uses a lookup table (for performance). 218 r.root.table = make([]*trieNode, r.tableSize) 219 220 for i := 0; i < len(oldnew); i += 2 { 221 r.root.add([]byte(oldnew[i]), []byte(oldnew[i+1]), len(oldnew)-i, r) 222 } 223 return r 224} 225 226func (r *genericReplacer) Replace(s []byte) []byte { 227 var last int 228 var prevMatchEmpty bool 229 dst := s[:0] 230 grown := false 231 for i := 0; i <= len(s); { 232 // Fast path: s[i] is not a prefix of any pattern. 233 if i != len(s) && r.root.priority == 0 { 234 index := int(r.mapping[s[i]]) 235 if index == r.tableSize || r.root.table[index] == nil { 236 i++ 237 continue 238 } 239 } 240 241 // Ignore the empty match iff the previous loop found the empty match. 242 val, keylen, match := r.lookup(s[i:], prevMatchEmpty) 243 prevMatchEmpty = match && keylen == 0 244 if match { 245 dst = append(dst, s[last:i]...) 246 if diff := len(val) - keylen; grown || diff < 0 { 247 dst = append(dst, val...) 248 i += keylen 249 } else if diff <= cap(s)-len(s) { 250 // The replacement is larger than the original, but can still fit in the original buffer. 251 copy(s[i+len(val):cap(dst)], s[i+keylen:]) 252 dst = append(dst, val...) 253 s = s[:len(s)+diff] 254 i += len(val) 255 } else { 256 // The output will grow larger than the original buffer. Allocate a new one. 257 grown = true 258 newDst := make([]byte, len(dst), cap(dst)+diff) 259 copy(newDst, dst) 260 dst = newDst 261 262 dst = append(dst, val...) 263 i += keylen 264 } 265 last = i 266 continue 267 } 268 i++ 269 } 270 if last != len(s) { 271 dst = append(dst, s[last:]...) 272 } 273 return dst 274} 275 276// byteReplacer is the implementation that's used when all the "old" 277// and "new" values are single ASCII bytes. 278// The array contains replacement bytes indexed by old byte. 279type byteReplacer [256]byte 280 281func (r *byteReplacer) Replace(s []byte) []byte { 282 for i, b := range s { 283 s[i] = r[b] 284 } 285 return s 286} 287