1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package regexp 16 17import ( 18 "fmt" 19 "regexp/syntax" 20) 21 22// ErrNoEmpty returned when "zero width assertions" are used 23var ErrNoEmpty = fmt.Errorf("zero width assertions not allowed") 24 25// ErrNoWordBoundary returned when word boundaries are used 26var ErrNoWordBoundary = fmt.Errorf("word boundaries are not allowed") 27 28// ErrNoBytes returned when byte literals are used 29var ErrNoBytes = fmt.Errorf("byte literals are not allowed") 30 31// ErrNoLazy returned when lazy quantifiers are used 32var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed") 33 34// ErrCompiledTooBig returned when regular expression parses into 35// too many instructions 36var ErrCompiledTooBig = fmt.Errorf("too many instructions") 37 38var DefaultLimit = uint(10 * (1 << 20)) 39 40// Regexp implements the vellum.Automaton interface for matcing a user 41// specified regular expression. 42type Regexp struct { 43 orig string 44 dfa *dfa 45} 46 47// NewRegexp creates a new Regular Expression automaton with the specified 48// expression. By default it is limited to approximately 10MB for the 49// compiled finite state automaton. If this size is exceeded, 50// ErrCompiledTooBig will be returned. 51func New(expr string) (*Regexp, error) { 52 return NewWithLimit(expr, DefaultLimit) 53} 54 55// NewRegexpWithLimit creates a new Regular Expression automaton with 56// the specified expression. The size of the compiled finite state 57// automaton exceeds the user specified size, ErrCompiledTooBig will be 58// returned. 59func NewWithLimit(expr string, size uint) (*Regexp, error) { 60 parsed, err := syntax.Parse(expr, syntax.Perl) 61 if err != nil { 62 return nil, err 63 } 64 return NewParsedWithLimit(expr, parsed, size) 65} 66 67func NewParsedWithLimit(expr string, parsed *syntax.Regexp, size uint) (*Regexp, error) { 68 compiler := newCompiler(size) 69 insts, err := compiler.compile(parsed) 70 if err != nil { 71 return nil, err 72 } 73 dfaBuilder := newDfaBuilder(insts) 74 dfa, err := dfaBuilder.build() 75 if err != nil { 76 return nil, err 77 } 78 return &Regexp{ 79 orig: expr, 80 dfa: dfa, 81 }, nil 82} 83 84// Start returns the start state of this automaton. 85func (r *Regexp) Start() int { 86 return 1 87} 88 89// IsMatch returns if the specified state is a matching state. 90func (r *Regexp) IsMatch(s int) bool { 91 if s < len(r.dfa.states) { 92 return r.dfa.states[s].match 93 } 94 return false 95} 96 97// CanMatch returns if the specified state can ever transition to a matching 98// state. 99func (r *Regexp) CanMatch(s int) bool { 100 if s < len(r.dfa.states) && s > 0 { 101 return true 102 } 103 return false 104} 105 106// WillAlwaysMatch returns if the specified state will always end in a 107// matching state. 108func (r *Regexp) WillAlwaysMatch(int) bool { 109 return false 110} 111 112// Accept returns the new state, resulting from the transition byte b 113// when currently in the state s. 114func (r *Regexp) Accept(s int, b byte) int { 115 if s < len(r.dfa.states) { 116 return r.dfa.states[s].next[b] 117 } 118 return 0 119} 120