1//  Copyright (c) 2017 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package regexp
16
17import (
18	"fmt"
19	"regexp/syntax"
20)
21
22// ErrNoEmpty returned when "zero width assertions" are used
23var ErrNoEmpty = fmt.Errorf("zero width assertions not allowed")
24
25// ErrNoWordBoundary returned when word boundaries are used
26var ErrNoWordBoundary = fmt.Errorf("word boundaries are not allowed")
27
28// ErrNoBytes returned when byte literals are used
29var ErrNoBytes = fmt.Errorf("byte literals are not allowed")
30
31// ErrNoLazy returned when lazy quantifiers are used
32var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")
33
34// ErrCompiledTooBig returned when regular expression parses into
35// too many instructions
36var ErrCompiledTooBig = fmt.Errorf("too many instructions")
37
38var DefaultLimit = uint(10 * (1 << 20))
39
40// Regexp implements the vellum.Automaton interface for matcing a user
41// specified regular expression.
42type Regexp struct {
43	orig string
44	dfa  *dfa
45}
46
47// NewRegexp creates a new Regular Expression automaton with the specified
48// expression.  By default it is limited to approximately 10MB for the
49// compiled finite state automaton.  If this size is exceeded,
50// ErrCompiledTooBig will be returned.
51func New(expr string) (*Regexp, error) {
52	return NewWithLimit(expr, DefaultLimit)
53}
54
55// NewRegexpWithLimit creates a new Regular Expression automaton with
56// the specified expression.  The size of the compiled finite state
57// automaton exceeds the user specified size,  ErrCompiledTooBig will be
58// returned.
59func NewWithLimit(expr string, size uint) (*Regexp, error) {
60	parsed, err := syntax.Parse(expr, syntax.Perl)
61	if err != nil {
62		return nil, err
63	}
64	return NewParsedWithLimit(expr, parsed, size)
65}
66
67func NewParsedWithLimit(expr string, parsed *syntax.Regexp, size uint) (*Regexp, error) {
68	compiler := newCompiler(size)
69	insts, err := compiler.compile(parsed)
70	if err != nil {
71		return nil, err
72	}
73	dfaBuilder := newDfaBuilder(insts)
74	dfa, err := dfaBuilder.build()
75	if err != nil {
76		return nil, err
77	}
78	return &Regexp{
79		orig: expr,
80		dfa:  dfa,
81	}, nil
82}
83
84// Start returns the start state of this automaton.
85func (r *Regexp) Start() int {
86	return 1
87}
88
89// IsMatch returns if the specified state is a matching state.
90func (r *Regexp) IsMatch(s int) bool {
91	if s < len(r.dfa.states) {
92		return r.dfa.states[s].match
93	}
94	return false
95}
96
97// CanMatch returns if the specified state can ever transition to a matching
98// state.
99func (r *Regexp) CanMatch(s int) bool {
100	if s < len(r.dfa.states) && s > 0 {
101		return true
102	}
103	return false
104}
105
106// WillAlwaysMatch returns if the specified state will always end in a
107// matching state.
108func (r *Regexp) WillAlwaysMatch(int) bool {
109	return false
110}
111
112// Accept returns the new state, resulting from the transition byte b
113// when currently in the state s.
114func (r *Regexp) Accept(s int, b byte) int {
115	if s < len(r.dfa.states) {
116		return r.dfa.states[s].next[b]
117	}
118	return 0
119}
120