1// Copyright 2014 Richard Lehane. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package bytematcher builds a matching engine from a set of signatures and performs concurrent matching against an input siegreader.Buffer. 16package bytematcher 17 18import ( 19 "fmt" 20 "sync" 21 22 wac "github.com/richardlehane/match/fwac" 23 "github.com/richardlehane/siegfried/internal/bytematcher/frames" 24 "github.com/richardlehane/siegfried/internal/persist" 25 "github.com/richardlehane/siegfried/internal/priority" 26 "github.com/richardlehane/siegfried/internal/siegreader" 27 "github.com/richardlehane/siegfried/pkg/core" 28) 29 30// Matcher matches byte signatures against the siegreader.Buffer. 31type Matcher struct { 32 // the following fields are persisted 33 keyFrames [][]keyFrame 34 tests []*testTree 35 bofFrames *frameSet 36 eofFrames *frameSet 37 bofSeq *seqSet 38 eofSeq *seqSet 39 knownBOF int 40 knownEOF int 41 maxBOF int 42 maxEOF int 43 priorities *priority.Set 44 // remaining fields are not persisted 45 bmu *sync.Once 46 emu *sync.Once 47 bAho wac.Wac 48 eAho wac.Wac 49 lowmem bool 50} 51 52// SignatureSet for a bytematcher is a slice of frames.Signature. 53type SignatureSet []frames.Signature 54 55// Load loads a Matcher. 56func Load(ls *persist.LoadSaver) core.Matcher { 57 if !ls.LoadBool() { 58 return nil 59 } 60 return &Matcher{ 61 keyFrames: loadKeyFrames(ls), 62 tests: loadTests(ls), 63 bofFrames: loadFrameSet(ls), 64 eofFrames: loadFrameSet(ls), 65 bofSeq: loadSeqSet(ls), 66 eofSeq: loadSeqSet(ls), 67 knownBOF: ls.LoadInt(), 68 knownEOF: ls.LoadInt(), 69 maxBOF: ls.LoadInt(), 70 maxEOF: ls.LoadInt(), 71 priorities: priority.Load(ls), 72 bmu: &sync.Once{}, 73 emu: &sync.Once{}, 74 } 75} 76 77// Save persists a Matcher. 78func Save(c core.Matcher, ls *persist.LoadSaver) { 79 if c == nil { 80 ls.SaveBool(false) 81 return 82 } 83 b := c.(*Matcher) 84 ls.SaveBool(true) 85 saveKeyFrames(ls, b.keyFrames) 86 saveTests(ls, b.tests) 87 b.bofFrames.save(ls) 88 b.eofFrames.save(ls) 89 b.bofSeq.save(ls) 90 b.eofSeq.save(ls) 91 ls.SaveInt(b.knownBOF) 92 ls.SaveInt(b.knownEOF) 93 ls.SaveInt(b.maxBOF) 94 ls.SaveInt(b.maxEOF) 95 b.priorities.Save(ls) 96} 97 98type sigErrors []error 99 100func (se sigErrors) Error() string { 101 str := "bytematcher.Signatures errors:" 102 for _, v := range se { 103 str += v.Error() 104 str += "\n" 105 } 106 return str 107} 108 109// Add a set of signatures to a bytematcher. 110// The priority list should be of equal length to the signatures, or nil (if no priorities are to be set). 111// 112// Example: 113// m, n, err := Add(bm, []frames.Signature{frames.Signature{frames.NewFrame(frames.BOF, patterns.Sequence{'p','d','f'}, 0, 0)}}, nil) 114func Add(c core.Matcher, ss core.SignatureSet, priorities priority.List) (core.Matcher, int, error) { 115 var b *Matcher 116 if c == nil { 117 b = &Matcher{ 118 bofFrames: &frameSet{}, 119 eofFrames: &frameSet{}, 120 bofSeq: &seqSet{}, 121 eofSeq: &seqSet{}, 122 priorities: &priority.Set{}, 123 bmu: &sync.Once{}, 124 emu: &sync.Once{}, 125 } 126 } else { 127 b = c.(*Matcher) 128 } 129 sigs, ok := ss.(SignatureSet) 130 if !ok { 131 return nil, -1, fmt.Errorf("Byte matcher: can't convert signature set to BM signature set") 132 } 133 if len(sigs) == 0 { 134 return c, len(b.keyFrames), nil // return same matcher as given (may be nil) if no signatures to add 135 } 136 var se sigErrors 137 // process each of the sigs, adding them to b.Sigs and the various seq/frame/testTree sets 138 var bof, eof int 139 for _, sig := range sigs { 140 if err := b.addSignature(sig); err == nil { 141 // get the local max bof and eof by popping last keyframe and testing 142 kf := b.keyFrames[len(b.keyFrames)-1] 143 bof, eof = maxBOF(bof, kf), maxEOF(eof, kf) 144 } else { 145 se = append(se, err) 146 } 147 } 148 if len(se) > 0 { 149 return nil, -1, se 150 } 151 // set the maximum distances for this test tree so can properly size slices for matching 152 for _, t := range b.tests { 153 t.maxLeftDistance = maxLength(t.left) 154 t.maxRightDistance = maxLength(t.right) 155 } 156 // add the priorities to the priority set 157 b.priorities.Add(priorities, len(sigs), bof, eof) 158 return b, len(b.keyFrames), nil 159} 160 161// Identify matches a Matcher's signatures against the input siegreader.Buffer. 162// Results are passed on the returned channel. 163// 164// Example: 165// ret := bm.Identify("", buf) 166// for v := range ret { 167// if v.Index() == 0 { 168// fmt.Print("Success! It is signature 0!") 169// } 170// } 171func (b *Matcher) Identify(name string, sb *siegreader.Buffer, hints ...core.Hint) (chan core.Result, error) { 172 quit, ret := make(chan struct{}), make(chan core.Result) 173 go b.identify(sb, quit, ret, hints...) 174 return ret, nil 175} 176 177// String returns information about the Bytematcher including the number of BOF, VAR and EOF sequences, the number of BOF and EOF frames, and the total number of tests. 178func (b *Matcher) String() string { 179 str := fmt.Sprintf("BOF seqs: %v\n", len(b.bofSeq.set)) 180 str += fmt.Sprintf("EOF seqs: %v\n", len(b.eofSeq.set)) 181 str += fmt.Sprintf("BOF frames: %v\n", len(b.bofFrames.set)) 182 str += fmt.Sprintf("EOF frames: %v\n", len(b.eofFrames.set)) 183 str += fmt.Sprintf("Total Test Trees: %v\n", len(b.tests)) 184 var c, ic, l, r, ml, mr int 185 for _, t := range b.tests { 186 c += len(t.complete) 187 ic += len(t.incomplete) 188 l += len(t.left) 189 if ml < t.maxLeftDistance { 190 ml = t.maxLeftDistance 191 } 192 r += len(t.right) 193 if mr < t.maxRightDistance { 194 mr = t.maxRightDistance 195 } 196 } 197 str += fmt.Sprintf("Complete Tests: %v\n", c) 198 str += fmt.Sprintf("Incomplete Tests: %v\n", ic) 199 str += fmt.Sprintf("Left Tests: %v\n", l) 200 str += fmt.Sprintf("Right Tests: %v\n", r) 201 str += fmt.Sprintf("Maximum Left Distance: %v\n", ml) 202 str += fmt.Sprintf("Maximum Right Distance: %v\n", mr) 203 str += fmt.Sprintf("Known BOF distance: %v\n", b.knownBOF) 204 str += fmt.Sprintf("Known EOF distance: %v\n", b.knownEOF) 205 str += fmt.Sprintf("Maximum BOF Distance: %v\n", b.maxBOF) 206 str += fmt.Sprintf("Maximum EOF Distance: %v\n", b.maxEOF) 207 str += fmt.Sprintf("priorities: %v\n", b.priorities) 208 return str 209} 210 211// InspectTestTree reports which signatures are linked to a given index in the test tree. 212// This is used by the -log debug and -log slow options for sf. 213func (b *Matcher) InspectTestTree(i int) []int { 214 cres, ires, _, _, _, _ := b.DescribeTestTree(i) 215 return append(cres, ires...) 216} 217 218func (b *Matcher) DescribeTestTree(i int) ([]int, []int, int, int, int, int) { 219 if i < 0 || i >= len(b.tests) { 220 return nil, nil, 0, 0, 0, 0 221 } 222 t := b.tests[i] 223 cres := make([]int, len(t.complete)) 224 for i, v := range t.complete { 225 cres[i] = v[0] 226 } 227 ires := make([]int, len(t.incomplete)) 228 for i, v := range t.incomplete { 229 ires[i] = v.kf[0] 230 } 231 return cres, ires, t.maxLeftDistance, t.maxRightDistance, maxMatches(t.left, t.maxLeftDistance), maxMatches(t.right, t.maxRightDistance) 232} 233 234func (b *Matcher) TestTreeLen() int { 235 return len(b.tests) 236} 237 238func (b *Matcher) DescribeKeyFrames(i int) []string { 239 if i < 0 || i >= len(b.keyFrames) { 240 return nil 241 } 242 ret := make([]string, len(b.keyFrames[i])) 243 for j := range ret { 244 ret[j] = b.keyFrames[i][j].String() 245 } 246 return ret 247} 248 249func (b *Matcher) KeyFramesLen() int { 250 return len(b.keyFrames) 251} 252 253// SetLowMem instructs the Aho Corasick search tree to be built with a low memory opt (runs slightly slower than regular). 254func (b *Matcher) SetLowMem() { 255 b.lowmem = true 256} 257