1// Copyright 2014 Richard Lehane. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package bytematcher builds a matching engine from a set of signatures and performs concurrent matching against an input siegreader.Buffer.
16package bytematcher
17
18import (
19	"fmt"
20	"sync"
21
22	wac "github.com/richardlehane/match/fwac"
23	"github.com/richardlehane/siegfried/internal/bytematcher/frames"
24	"github.com/richardlehane/siegfried/internal/persist"
25	"github.com/richardlehane/siegfried/internal/priority"
26	"github.com/richardlehane/siegfried/internal/siegreader"
27	"github.com/richardlehane/siegfried/pkg/core"
28)
29
30// Matcher matches byte signatures against the siegreader.Buffer.
31type Matcher struct {
32	// the following fields are persisted
33	keyFrames  [][]keyFrame
34	tests      []*testTree
35	bofFrames  *frameSet
36	eofFrames  *frameSet
37	bofSeq     *seqSet
38	eofSeq     *seqSet
39	knownBOF   int
40	knownEOF   int
41	maxBOF     int
42	maxEOF     int
43	priorities *priority.Set
44	// remaining fields are not persisted
45	bmu    *sync.Once
46	emu    *sync.Once
47	bAho   wac.Wac
48	eAho   wac.Wac
49	lowmem bool
50}
51
52// SignatureSet for a bytematcher is a slice of frames.Signature.
53type SignatureSet []frames.Signature
54
55// Load loads a Matcher.
56func Load(ls *persist.LoadSaver) core.Matcher {
57	if !ls.LoadBool() {
58		return nil
59	}
60	return &Matcher{
61		keyFrames:  loadKeyFrames(ls),
62		tests:      loadTests(ls),
63		bofFrames:  loadFrameSet(ls),
64		eofFrames:  loadFrameSet(ls),
65		bofSeq:     loadSeqSet(ls),
66		eofSeq:     loadSeqSet(ls),
67		knownBOF:   ls.LoadInt(),
68		knownEOF:   ls.LoadInt(),
69		maxBOF:     ls.LoadInt(),
70		maxEOF:     ls.LoadInt(),
71		priorities: priority.Load(ls),
72		bmu:        &sync.Once{},
73		emu:        &sync.Once{},
74	}
75}
76
77// Save persists a Matcher.
78func Save(c core.Matcher, ls *persist.LoadSaver) {
79	if c == nil {
80		ls.SaveBool(false)
81		return
82	}
83	b := c.(*Matcher)
84	ls.SaveBool(true)
85	saveKeyFrames(ls, b.keyFrames)
86	saveTests(ls, b.tests)
87	b.bofFrames.save(ls)
88	b.eofFrames.save(ls)
89	b.bofSeq.save(ls)
90	b.eofSeq.save(ls)
91	ls.SaveInt(b.knownBOF)
92	ls.SaveInt(b.knownEOF)
93	ls.SaveInt(b.maxBOF)
94	ls.SaveInt(b.maxEOF)
95	b.priorities.Save(ls)
96}
97
98type sigErrors []error
99
100func (se sigErrors) Error() string {
101	str := "bytematcher.Signatures errors:"
102	for _, v := range se {
103		str += v.Error()
104		str += "\n"
105	}
106	return str
107}
108
109// Add a set of signatures to a bytematcher.
110// The priority list should be of equal length to the signatures, or nil (if no priorities are to be set).
111//
112// Example:
113//   m, n, err := Add(bm, []frames.Signature{frames.Signature{frames.NewFrame(frames.BOF, patterns.Sequence{'p','d','f'}, 0, 0)}}, nil)
114func Add(c core.Matcher, ss core.SignatureSet, priorities priority.List) (core.Matcher, int, error) {
115	var b *Matcher
116	if c == nil {
117		b = &Matcher{
118			bofFrames:  &frameSet{},
119			eofFrames:  &frameSet{},
120			bofSeq:     &seqSet{},
121			eofSeq:     &seqSet{},
122			priorities: &priority.Set{},
123			bmu:        &sync.Once{},
124			emu:        &sync.Once{},
125		}
126	} else {
127		b = c.(*Matcher)
128	}
129	sigs, ok := ss.(SignatureSet)
130	if !ok {
131		return nil, -1, fmt.Errorf("Byte matcher: can't convert signature set to BM signature set")
132	}
133	if len(sigs) == 0 {
134		return c, len(b.keyFrames), nil // return same matcher as given (may be nil) if no signatures to add
135	}
136	var se sigErrors
137	// process each of the sigs, adding them to b.Sigs and the various seq/frame/testTree sets
138	var bof, eof int
139	for _, sig := range sigs {
140		if err := b.addSignature(sig); err == nil {
141			// get the local max bof and eof by popping last keyframe and testing
142			kf := b.keyFrames[len(b.keyFrames)-1]
143			bof, eof = maxBOF(bof, kf), maxEOF(eof, kf)
144		} else {
145			se = append(se, err)
146		}
147	}
148	if len(se) > 0 {
149		return nil, -1, se
150	}
151	// set the maximum distances for this test tree so can properly size slices for matching
152	for _, t := range b.tests {
153		t.maxLeftDistance = maxLength(t.left)
154		t.maxRightDistance = maxLength(t.right)
155	}
156	// add the priorities to the priority set
157	b.priorities.Add(priorities, len(sigs), bof, eof)
158	return b, len(b.keyFrames), nil
159}
160
161// Identify matches a Matcher's signatures against the input siegreader.Buffer.
162// Results are passed on the returned channel.
163//
164// Example:
165//   ret := bm.Identify("", buf)
166//   for v := range ret {
167//     if v.Index() == 0 {
168//       fmt.Print("Success! It is signature 0!")
169//     }
170//   }
171func (b *Matcher) Identify(name string, sb *siegreader.Buffer, hints ...core.Hint) (chan core.Result, error) {
172	quit, ret := make(chan struct{}), make(chan core.Result)
173	go b.identify(sb, quit, ret, hints...)
174	return ret, nil
175}
176
177// String returns information about the Bytematcher including the number of BOF, VAR and EOF sequences, the number of BOF and EOF frames, and the total number of tests.
178func (b *Matcher) String() string {
179	str := fmt.Sprintf("BOF seqs: %v\n", len(b.bofSeq.set))
180	str += fmt.Sprintf("EOF seqs: %v\n", len(b.eofSeq.set))
181	str += fmt.Sprintf("BOF frames: %v\n", len(b.bofFrames.set))
182	str += fmt.Sprintf("EOF frames: %v\n", len(b.eofFrames.set))
183	str += fmt.Sprintf("Total Test Trees: %v\n", len(b.tests))
184	var c, ic, l, r, ml, mr int
185	for _, t := range b.tests {
186		c += len(t.complete)
187		ic += len(t.incomplete)
188		l += len(t.left)
189		if ml < t.maxLeftDistance {
190			ml = t.maxLeftDistance
191		}
192		r += len(t.right)
193		if mr < t.maxRightDistance {
194			mr = t.maxRightDistance
195		}
196	}
197	str += fmt.Sprintf("Complete Tests: %v\n", c)
198	str += fmt.Sprintf("Incomplete Tests: %v\n", ic)
199	str += fmt.Sprintf("Left Tests: %v\n", l)
200	str += fmt.Sprintf("Right Tests: %v\n", r)
201	str += fmt.Sprintf("Maximum Left Distance: %v\n", ml)
202	str += fmt.Sprintf("Maximum Right Distance: %v\n", mr)
203	str += fmt.Sprintf("Known BOF distance: %v\n", b.knownBOF)
204	str += fmt.Sprintf("Known EOF distance: %v\n", b.knownEOF)
205	str += fmt.Sprintf("Maximum BOF Distance: %v\n", b.maxBOF)
206	str += fmt.Sprintf("Maximum EOF Distance: %v\n", b.maxEOF)
207	str += fmt.Sprintf("priorities: %v\n", b.priorities)
208	return str
209}
210
211// InspectTestTree reports which signatures are linked to a given index in the test tree.
212// This is used by the -log debug and -log slow options for sf.
213func (b *Matcher) InspectTestTree(i int) []int {
214	cres, ires, _, _, _, _ := b.DescribeTestTree(i)
215	return append(cres, ires...)
216}
217
218func (b *Matcher) DescribeTestTree(i int) ([]int, []int, int, int, int, int) {
219	if i < 0 || i >= len(b.tests) {
220		return nil, nil, 0, 0, 0, 0
221	}
222	t := b.tests[i]
223	cres := make([]int, len(t.complete))
224	for i, v := range t.complete {
225		cres[i] = v[0]
226	}
227	ires := make([]int, len(t.incomplete))
228	for i, v := range t.incomplete {
229		ires[i] = v.kf[0]
230	}
231	return cres, ires, t.maxLeftDistance, t.maxRightDistance, maxMatches(t.left, t.maxLeftDistance), maxMatches(t.right, t.maxRightDistance)
232}
233
234func (b *Matcher) TestTreeLen() int {
235	return len(b.tests)
236}
237
238func (b *Matcher) DescribeKeyFrames(i int) []string {
239	if i < 0 || i >= len(b.keyFrames) {
240		return nil
241	}
242	ret := make([]string, len(b.keyFrames[i]))
243	for j := range ret {
244		ret[j] = b.keyFrames[i][j].String()
245	}
246	return ret
247}
248
249func (b *Matcher) KeyFramesLen() int {
250	return len(b.keyFrames)
251}
252
253// SetLowMem instructs the Aho Corasick search tree to be built with a low memory opt (runs slightly slower than regular).
254func (b *Matcher) SetLowMem() {
255	b.lowmem = true
256}
257