1// Copyright 2014 Richard Lehane. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package priority creates a subordinate-superiors map of identifications. 16// These maps can be flattened into sorted lists for use by the bytematcher and containermatcher engines. 17// Multiple priority lists can be added to priority sets. These contain the priorities of different identifiers within a bytematcher or containermatcher. 18package priority 19 20import ( 21 "fmt" 22 "sort" 23 "sync" 24 25 "github.com/richardlehane/siegfried/internal/persist" 26 "github.com/richardlehane/siegfried/pkg/core" 27) 28 29// a priority map links subordinate results to a list of priority results 30type Map map[string][]string 31 32func (m Map) Difference(mb Map) Map { 33 mc := make(Map) 34 for k, v := range m { 35 vb, ok := mb[k] 36 if !ok { 37 mc[k] = v 38 continue 39 } 40 e := extras(v, vb) 41 if len(e) > 0 { 42 mc[k] = e 43 } 44 } 45 return mc 46} 47 48func (m Map) Elements() [][2]string { 49 fmts := make(map[string]bool) 50 elements := make([][2]string, 0, len(m)*3) 51 for k, v := range m { 52 for _, sup := range v { 53 elements = append(elements, [2]string{k, sup}) 54 fmts[sup] = true 55 } 56 } 57 for k, v := range m { 58 if len(v) == 0 && !fmts[k] { 59 elements = append(elements, [2]string{k, ""}) 60 } 61 } 62 return elements 63} 64 65func containsStr(ss []string, s string) bool { 66 for _, v := range ss { 67 if v == s { 68 return true 69 } 70 } 71 return false 72} 73 74func addStr(ss []string, s string) []string { 75 if containsStr(ss, s) { 76 return ss 77 } 78 return append(ss, s) 79} 80 81// add a subordinate-superior relationship to the priority map 82func (m Map) Add(subordinate string, superior string) { 83 if subordinate == "" || superior == "" { 84 return 85 } 86 _, ok := m[subordinate] 87 if ok { 88 m[subordinate] = addStr(m[subordinate], superior) 89 return 90 } 91 m[subordinate] = []string{superior} 92} 93 94// create a list of all strings that appear in 'a' but not 'b' 95func extras(a []string, b []string) []string { 96 ret := make([]string, 0) 97 for _, v := range a { 98 var exists bool 99 for _, v1 := range b { 100 if v == v1 { 101 exists = true 102 break 103 } 104 } 105 if !exists { 106 ret = append(ret, v) 107 } 108 } 109 return ret 110} 111 112func (m Map) priorityWalk(k string) []string { 113 tried := make([]string, 0) 114 ret := make([]string, 0) 115 var walkFn func(string) 116 walkFn = func(id string) { 117 vals, ok := m[id] 118 if !ok { 119 return 120 } 121 for _, v := range vals { 122 // avoid cycles 123 if containsStr(tried, v) { 124 continue 125 } 126 tried = append(tried, v) 127 priorityPriorities := m[v] 128 ret = append(ret, extras(priorityPriorities, vals)...) 129 walkFn(v) 130 } 131 } 132 walkFn(k) 133 return ret 134} 135 136// After adding all priorities, walk the priority map to make sure that it is consistent, 137// i.e. that for any format with a superior fmt, then anything superior 138// to that superior fmt is also marked as superior to the base fmt, all the way down the tree 139func (m Map) Complete() { 140 for k := range m { 141 extraPriorities := m.priorityWalk(k) 142 m[k] = append(m[k], extraPriorities...) 143 } 144} 145 146// because keys can be duplicated in the slice given to List(), the list of superior indexes may be larger than the list of superior keys 147func (m Map) expand(key string, iMap map[string][]int) []int { 148 // use an empty, rather than nil slice for ret. This means a priority.List will never contain a nil slice. 149 ret := make([]int, 0) 150 superiors := m[key] 151 for _, k := range superiors { 152 ret = append(ret, iMap[k]...) 153 } 154 sort.Ints(ret) 155 return ret 156} 157 158// Filter returns a new Priority Map that just contains formats in the provided slice 159func (m Map) Filter(fmts []string) Map { 160 ret := make(Map) 161 for _, v := range fmts { 162 l := m[v] 163 n := []string{} 164 for _, w := range l { 165 for _, x := range fmts { 166 if w == x { 167 n = append(n, w) 168 break 169 } 170 } 171 } 172 ret[v] = n 173 } 174 return ret 175} 176 177// return a priority list using the indexes from the supplied slice of keys (keys can be duplicated in that slice) 178func (m Map) List(keys []string) List { 179 if m == nil { 180 return nil 181 } 182 // build a map of keys to their indexes in the supplied slice 183 iMap := make(map[string][]int) 184 for _, k := range keys { 185 // continue on if the key has already been added 186 _, ok := iMap[k] 187 if ok { 188 continue 189 } 190 var indexes []int 191 for i, v := range keys { 192 if v == k { 193 indexes = append(indexes, i) 194 } 195 } 196 iMap[k] = indexes 197 } 198 l := make(List, len(keys)) 199 for i, k := range keys { 200 l[i] = m.expand(k, iMap) 201 } 202 return l 203} 204 205type List [][]int 206 207// take a list of indexes, subtract the length of the previous priority list in a set (or 0) to get relative indexes, 208// then map those against a priority list. Re-number according to indexes and return the common subset. 209func (l List) Subset(indexes []int, prev int) List { 210 if l == nil { 211 return nil 212 } 213 submap := make(map[int]int) 214 for i, v := range indexes { 215 submap[v-prev] = i 216 } 217 subset := make(List, len(indexes)) 218 for i, v := range indexes { 219 ns := make([]int, 0, len(l[v-prev])) 220 for _, w := range l[v-prev] { 221 if idx, ok := submap[w]; ok { 222 ns = append(ns, idx) 223 } 224 } 225 subset[i] = ns 226 } 227 return subset 228} 229 230func (l List) String() string { 231 if l == nil { 232 return "priority list: nil" 233 } 234 return fmt.Sprintf("priority list: %v", [][]int(l)) 235} 236 237// A priority set holds a number of priority lists 238type Set struct { 239 idx []int 240 lists []List 241 maxOffsets [][2]int 242} 243 244func (s *Set) Save(ls *persist.LoadSaver) { 245 ls.SaveInts(s.idx) 246 ls.SaveSmallInt(len(s.lists)) 247 for _, v := range s.lists { 248 ls.SaveSmallInt(len(v)) 249 for _, w := range v { 250 ls.SaveInts(w) 251 } 252 } 253 ls.SaveSmallInt(len(s.maxOffsets)) 254 for _, v := range s.maxOffsets { 255 ls.SaveInt(v[0]) 256 ls.SaveInt(v[1]) 257 } 258} 259 260func Load(ls *persist.LoadSaver) *Set { 261 set := &Set{} 262 set.idx = ls.LoadInts() 263 set.lists = make([]List, ls.LoadSmallInt()) 264 for i := range set.lists { 265 le := ls.LoadSmallInt() 266 if le == 0 { 267 continue 268 } 269 set.lists[i] = make(List, le) 270 for j := range set.lists[i] { 271 set.lists[i][j] = ls.LoadInts() 272 } 273 } 274 set.maxOffsets = make([][2]int, ls.LoadSmallInt()) 275 for i := range set.maxOffsets { 276 set.maxOffsets[i] = [2]int{ls.LoadInt(), ls.LoadInt()} 277 } 278 return set 279} 280 281// Add a priority list to a set. The length is the number of signatures the priority list applies to, not the length of the priority list. 282// This length will only differ when no priorities are set for a given set of signatures. 283func (s *Set) Add(l List, length, bof, eof int) { 284 var last int 285 if len(s.idx) > 0 { 286 last = s.idx[len(s.idx)-1] 287 } 288 s.idx = append(s.idx, length+last) 289 s.lists = append(s.lists, l) 290 s.maxOffsets = append(s.maxOffsets, [2]int{bof, eof}) 291} 292 293func (s *Set) list(i, j int) []int { 294 if s.lists[i] == nil { 295 return nil 296 } else { 297 l := s.lists[i][j] 298 if l == nil { 299 l = []int{} 300 } 301 return l 302 } 303} 304 305// at given BOF and EOF offsets, should we still wait on a given priority set? 306func (s *Set) await(idx int, bof, eof int64) bool { 307 if s.maxOffsets[idx][0] < 0 || (s.maxOffsets[idx][0] > 0 && int64(s.maxOffsets[idx][0]) >= bof) { 308 return true 309 } 310 if s.maxOffsets[idx][1] < 0 || (s.maxOffsets[idx][1] > 0 && int64(s.maxOffsets[idx][1]) >= eof) { 311 return true 312 } 313 return false 314} 315 316// Index return the index of the s.lists for the wait list, and return the previous tally 317// previous tally is necessary for adding to the values in the priority list to give real priorities 318func (s *Set) Index(i int) (int, int) { 319 var prev int 320 for idx, v := range s.idx { 321 if i < v { 322 return idx, prev 323 } 324 prev = v 325 } 326 // should never get here. Signal error 327 return -1, -1 328} 329 330// A wait set is a mutating structure that holds the set of indexes that should be waited for while matching underway 331type WaitSet struct { 332 *Set 333 wait [][]int // a nil list means we're not waiting on anything yet; an empty list means nothing to wait for i.e. satisifed 334 this []int // record last hit so can avoid pivotting to weaker matches 335 pivot [][]int // a pivot list is a list of indexes that we could potentially pivot to. E.g. for a .pdf file that has mp3 signatures, but is actually a PDF 336 m *sync.RWMutex 337} 338 339// WaitSet creates a new WaitSet given a list of hints 340func (s *Set) WaitSet(hints ...core.Hint) *WaitSet { 341 ws := &WaitSet{ 342 s, 343 make([][]int, len(s.lists)), 344 make([]int, len(s.lists)), 345 make([][]int, len(s.lists)), 346 &sync.RWMutex{}, 347 } 348 for _, h := range hints { 349 idx, _ := s.Index(h.Exclude) 350 if h.Pivot == nil { // if h.Pivot is nil (as opposed to empty slice), it is a signal that that matcher is satisfied 351 ws.wait[idx] = []int{} 352 } else { 353 ws.pivot[idx] = h.Pivot 354 } 355 } 356 return ws 357} 358 359// MaxOffsets returns max/min offset info in order to override the max/min offsets set on the bytematcher when 360// any identifiers have been excluded. 361func (w *WaitSet) MaxOffsets() (int, int) { 362 var bof, eof int 363 for i, v := range w.wait { 364 if v == nil { 365 if bof >= 0 && (w.maxOffsets[i][0] < 0 || bof < w.maxOffsets[i][0]) { 366 bof = w.maxOffsets[i][0] 367 } 368 if eof >= 0 && (w.maxOffsets[i][1] < 0 || eof < w.maxOffsets[i][1]) { 369 eof = w.maxOffsets[i][1] 370 } 371 } 372 } 373 return bof, eof 374} 375 376func inPivot(i int, ii []int) bool { 377 for _, v := range ii { 378 if i == v { 379 return true 380 } 381 } 382 return false 383} 384 385func mightPivot(i int, ii []int) bool { 386 return len(ii) > 0 && !inPivot(i, ii) 387} 388 389// Set the priority list & return a boolean indicating whether the WaitSet is satisfied such that matching can stop (i.e. no priority list is nil, and all are empty) 390func (w *WaitSet) Put(i int) bool { 391 idx, prev := w.Index(i) 392 l := w.list(idx, i-prev) 393 // no priorities for this set, return false immediately 394 if l == nil { 395 return false 396 } 397 w.m.Lock() 398 defer w.m.Unlock() 399 // set the wait list 400 w.wait[idx] = l 401 // set this 402 w.this[idx] = i - prev 403 mp := mightPivot(i, w.pivot[idx]) 404 if !mp { 405 w.pivot[idx] = nil // ditch the pivot list if it is just confirming a match or empty 406 } 407 // if we have any priorities, then we aren't satisified 408 if len(l) > 0 || mp { 409 return false 410 } 411 // if l is 0, and we have only one priority set, and we're not going to pivot, then we are satisfied 412 if len(w.wait) == 1 && !mp { 413 return true 414 } 415 // otherwise, let's check all the other priority sets for wait sets or pivot lists 416 for i, v := range w.wait { 417 if i == idx { 418 continue 419 } 420 if v == nil || len(v) > 0 || len(w.pivot[i]) > 0 { 421 return false 422 } 423 } 424 return true 425} 426 427// Set the priority list & return a boolean indicating whether the WaitSet is satisfied such that matching can stop (i.e. no priority list is nil, and all are empty) 428func (w *WaitSet) PutAt(i int, bof, eof int64) bool { 429 idx, prev := w.Index(i) 430 l := w.list(idx, i-prev) 431 // no priorities for this set, return false immediately 432 if l == nil && w.await(idx, bof, eof) { 433 return false 434 } 435 w.m.Lock() 436 defer w.m.Unlock() 437 // set the wait list 438 w.wait[idx] = l 439 // set this 440 w.this[idx] = i - prev 441 mp := mightPivot(i, w.pivot[idx]) 442 if !mp { 443 w.pivot[idx] = nil // ditch the pivot list if it is just confirming a match or empty 444 } 445 // if we have any priorities, then we aren't satisified 446 if (len(l) > 0 || mp) && w.await(idx, bof, eof) { 447 return false 448 } 449 // if l is 0, and we have only one priority set, and we're not going to pivot, then we are satisfied 450 if len(w.wait) == 1 && !mp { 451 return true 452 } 453 // otherwise, let's check all the other priority sets 454 for i, v := range w.wait { 455 if i == idx { 456 continue 457 } 458 if w.await(i, bof, eof) { 459 if v == nil || len(v) > 0 || len(w.pivot[i]) > 0 { 460 return false 461 } 462 } 463 } 464 return true 465} 466 467// Check a signature index against the appropriate priority list. Should we continue trying to match this signature? 468func (w *WaitSet) Check(i int) bool { 469 idx, prev := w.Index(i) 470 w.m.RLock() 471 defer w.m.RUnlock() 472 return w.check(i, idx, prev) 473} 474 475func (w *WaitSet) check(i, idx, prev int) bool { 476 if w.wait[idx] == nil { 477 return true 478 } 479 j := sort.SearchInts(w.wait[idx], i-prev) 480 if j == len(w.wait[idx]) || w.wait[idx][j] != i-prev { 481 if inPivot(i, w.pivot[idx]) { 482 l := w.list(idx, i-prev) 483 k := sort.SearchInts(l, w.this[idx]) 484 if k < len(l) && l[k] == w.this[idx] { 485 return false 486 } 487 return true 488 } 489 return false 490 } 491 return true 492} 493 494// Filter a waitset with a list of potential matches, return only those that we are still waiting on. Return nil if none. 495func (w *WaitSet) Filter(l []int) []int { 496 ret := make([]int, 0, len(l)) 497 w.m.RLock() 498 defer w.m.RUnlock() 499 for _, v := range l { 500 idx, prev := w.Index(v) 501 if w.check(v, idx, prev) { 502 ret = append(ret, v) 503 } 504 } 505 if len(ret) == 0 { 506 return nil 507 } 508 return ret 509} 510 511type Filterable interface { 512 Next() int 513 Mark(bool) 514} 515 516func (w *WaitSet) ApplyFilter(f Filterable) { 517 w.m.RLock() 518 defer w.m.RUnlock() 519 for i := f.Next(); i > -1; i = f.Next() { 520 idx, prev := w.Index(i) 521 f.Mark(w.check(i, idx, prev)) 522 } 523} 524 525// For periodic checking - what signatures are we currently waiting on? 526// Accumulates values from all the priority lists within the set. 527// Returns nil if *any* of the priority lists is nil. 528func (w *WaitSet) WaitingOn() []int { 529 w.m.RLock() 530 defer w.m.RUnlock() 531 var l int 532 for i, v := range w.wait { 533 if v == nil { 534 return nil 535 } 536 l = l + len(v) + len(w.pivot[i]) 537 } 538 ret := make([]int, l) 539 var prev, j int 540 for i, v := range w.wait { 541 for _, x := range v { 542 ret[j] = x + prev 543 j++ 544 } 545 copy(ret[j:], w.pivot[i]) 546 j += len(w.pivot[i]) 547 prev = w.idx[i] 548 } 549 return ret 550} 551 552// For periodic checking - what signatures are we currently waiting on, at the given offsets? 553// Accumulates values from all the priority lists within the set. 554// Returns nil if *any* of the priority lists is nil. 555func (w *WaitSet) WaitingOnAt(bof, eof int64) []int { 556 w.m.RLock() 557 defer w.m.RUnlock() 558 var l int 559 for i, v := range w.wait { 560 if w.await(i, bof, eof) { 561 if v == nil { 562 return nil 563 } 564 l = l + len(v) + len(w.pivot[i]) 565 } 566 } 567 ret := make([]int, l) 568 var prev, j int 569 for i, v := range w.wait { 570 if w.await(i, bof, eof) { 571 for _, x := range v { 572 ret[j] = x + prev 573 j++ 574 } 575 copy(ret[j:], w.pivot[i]) 576 j += len(w.pivot[i]) 577 } 578 prev = w.idx[i] 579 } 580 return ret 581} 582