1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package levenshtein 16 17import ( 18 "fmt" 19) 20 21// StateLimit is the maximum number of states allowed 22const StateLimit = 10000 23 24// ErrTooManyStates is returned if you attempt to build a Levenshtein 25// automaton which requires too many states. 26var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states", StateLimit) 27 28// Levenshtein implements the vellum.Automaton interface for matching 29// terms within the specified Levenshtein edit-distance of the queried 30// term. This automaton recognizes utf-8 encoded bytes and computes 31// the edit distance on the result code-points, not on the raw bytes. 32type Levenshtein struct { 33 prog *dynamicLevenshtein 34 dfa *dfa 35} 36 37// New creates a new Levenshtein automaton for the specified 38// query string and edit distance. 39func New(query string, distance int) (*Levenshtein, error) { 40 lev := &dynamicLevenshtein{ 41 query: query, 42 distance: uint(distance), 43 } 44 dfabuilder := newDfaBuilder(lev) 45 dfa, err := dfabuilder.build() 46 if err != nil { 47 return nil, err 48 } 49 return &Levenshtein{ 50 prog: lev, 51 dfa: dfa, 52 }, nil 53} 54 55// Start returns the start state of this automaton. 56func (l *Levenshtein) Start() int { 57 return 1 58} 59 60// IsMatch returns if the specified state is a matching state. 61func (l *Levenshtein) IsMatch(s int) bool { 62 if s < len(l.dfa.states) { 63 return l.dfa.states[s].match 64 } 65 return false 66} 67 68// CanMatch returns if the specified state can ever transition to a matching 69// state. 70func (l *Levenshtein) CanMatch(s int) bool { 71 if s < len(l.dfa.states) && s > 0 { 72 return true 73 } 74 return false 75} 76 77// WillAlwaysMatch returns if the specified state will always end in a 78// matching state. 79func (l *Levenshtein) WillAlwaysMatch(s int) bool { 80 return false 81} 82 83// Accept returns the new state, resulting from the transite byte b 84// when currently in the state s. 85func (l *Levenshtein) Accept(s int, b byte) int { 86 if s < len(l.dfa.states) { 87 return l.dfa.states[s].next[b] 88 } 89 return 0 90} 91