1// Copyright 2014 Richard Lehane. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package priority creates a subordinate-superiors map of identifications.
16// These maps can be flattened into sorted lists for use by the bytematcher and containermatcher engines.
17// Multiple priority lists can be added to priority sets. These contain the priorities of different identifiers within a bytematcher or containermatcher.
18package priority
19
20import (
21	"fmt"
22	"sort"
23	"sync"
24
25	"github.com/richardlehane/siegfried/internal/persist"
26	"github.com/richardlehane/siegfried/pkg/core"
27)
28
29// a priority map links subordinate results to a list of priority results
30type Map map[string][]string
31
32func (m Map) Difference(mb Map) Map {
33	mc := make(Map)
34	for k, v := range m {
35		vb, ok := mb[k]
36		if !ok {
37			mc[k] = v
38			continue
39		}
40		e := extras(v, vb)
41		if len(e) > 0 {
42			mc[k] = e
43		}
44	}
45	return mc
46}
47
48func (m Map) Elements() [][2]string {
49	fmts := make(map[string]bool)
50	elements := make([][2]string, 0, len(m)*3)
51	for k, v := range m {
52		for _, sup := range v {
53			elements = append(elements, [2]string{k, sup})
54			fmts[sup] = true
55		}
56	}
57	for k, v := range m {
58		if len(v) == 0 && !fmts[k] {
59			elements = append(elements, [2]string{k, ""})
60		}
61	}
62	return elements
63}
64
65func containsStr(ss []string, s string) bool {
66	for _, v := range ss {
67		if v == s {
68			return true
69		}
70	}
71	return false
72}
73
74func addStr(ss []string, s string) []string {
75	if containsStr(ss, s) {
76		return ss
77	}
78	return append(ss, s)
79}
80
81// add a subordinate-superior relationship to the priority map
82func (m Map) Add(subordinate string, superior string) {
83	if subordinate == "" || superior == "" {
84		return
85	}
86	_, ok := m[subordinate]
87	if ok {
88		m[subordinate] = addStr(m[subordinate], superior)
89		return
90	}
91	m[subordinate] = []string{superior}
92}
93
94// create a list of all strings that appear in 'a' but not 'b'
95func extras(a []string, b []string) []string {
96	ret := make([]string, 0)
97	for _, v := range a {
98		var exists bool
99		for _, v1 := range b {
100			if v == v1 {
101				exists = true
102				break
103			}
104		}
105		if !exists {
106			ret = append(ret, v)
107		}
108	}
109	return ret
110}
111
112func (m Map) priorityWalk(k string) []string {
113	tried := make([]string, 0)
114	ret := make([]string, 0)
115	var walkFn func(string)
116	walkFn = func(id string) {
117		vals, ok := m[id]
118		if !ok {
119			return
120		}
121		for _, v := range vals {
122			// avoid cycles
123			if containsStr(tried, v) {
124				continue
125			}
126			tried = append(tried, v)
127			priorityPriorities := m[v]
128			ret = append(ret, extras(priorityPriorities, vals)...)
129			walkFn(v)
130		}
131	}
132	walkFn(k)
133	return ret
134}
135
136// After adding all priorities, walk the priority map to make sure that it is consistent,
137// i.e. that for any format with a superior fmt, then anything superior
138// to that superior fmt is also marked as superior to the base fmt, all the way down the tree
139func (m Map) Complete() {
140	for k := range m {
141		extraPriorities := m.priorityWalk(k)
142		m[k] = append(m[k], extraPriorities...)
143	}
144}
145
146// because keys can be duplicated in the slice given to List(), the list of superior indexes may be larger than the list of superior keys
147func (m Map) expand(key string, iMap map[string][]int) []int {
148	// use an empty, rather than nil slice for ret. This means a priority.List will never contain a nil slice.
149	ret := make([]int, 0)
150	superiors := m[key]
151	for _, k := range superiors {
152		ret = append(ret, iMap[k]...)
153	}
154	sort.Ints(ret)
155	return ret
156}
157
158// Filter returns a new Priority Map that just contains formats in the provided slice
159func (m Map) Filter(fmts []string) Map {
160	ret := make(Map)
161	for _, v := range fmts {
162		l := m[v]
163		n := []string{}
164		for _, w := range l {
165			for _, x := range fmts {
166				if w == x {
167					n = append(n, w)
168					break
169				}
170			}
171		}
172		ret[v] = n
173	}
174	return ret
175}
176
177// return a priority list using the indexes from the supplied slice of keys (keys can be duplicated in that slice)
178func (m Map) List(keys []string) List {
179	if m == nil {
180		return nil
181	}
182	// build a map of keys to their indexes in the supplied slice
183	iMap := make(map[string][]int)
184	for _, k := range keys {
185		// continue on if the key has already been added
186		_, ok := iMap[k]
187		if ok {
188			continue
189		}
190		var indexes []int
191		for i, v := range keys {
192			if v == k {
193				indexes = append(indexes, i)
194			}
195		}
196		iMap[k] = indexes
197	}
198	l := make(List, len(keys))
199	for i, k := range keys {
200		l[i] = m.expand(k, iMap)
201	}
202	return l
203}
204
205type List [][]int
206
207// take a list of indexes, subtract the length of the previous priority list in a set (or 0) to get relative indexes,
208// then map those against a priority list. Re-number according to indexes and return the common subset.
209func (l List) Subset(indexes []int, prev int) List {
210	if l == nil {
211		return nil
212	}
213	submap := make(map[int]int)
214	for i, v := range indexes {
215		submap[v-prev] = i
216	}
217	subset := make(List, len(indexes))
218	for i, v := range indexes {
219		ns := make([]int, 0, len(l[v-prev]))
220		for _, w := range l[v-prev] {
221			if idx, ok := submap[w]; ok {
222				ns = append(ns, idx)
223			}
224		}
225		subset[i] = ns
226	}
227	return subset
228}
229
230func (l List) String() string {
231	if l == nil {
232		return "priority list: nil"
233	}
234	return fmt.Sprintf("priority list: %v", [][]int(l))
235}
236
237// A priority set holds a number of priority lists
238type Set struct {
239	idx        []int
240	lists      []List
241	maxOffsets [][2]int
242}
243
244func (s *Set) Save(ls *persist.LoadSaver) {
245	ls.SaveInts(s.idx)
246	ls.SaveSmallInt(len(s.lists))
247	for _, v := range s.lists {
248		ls.SaveSmallInt(len(v))
249		for _, w := range v {
250			ls.SaveInts(w)
251		}
252	}
253	ls.SaveSmallInt(len(s.maxOffsets))
254	for _, v := range s.maxOffsets {
255		ls.SaveInt(v[0])
256		ls.SaveInt(v[1])
257	}
258}
259
260func Load(ls *persist.LoadSaver) *Set {
261	set := &Set{}
262	set.idx = ls.LoadInts()
263	set.lists = make([]List, ls.LoadSmallInt())
264	for i := range set.lists {
265		le := ls.LoadSmallInt()
266		if le == 0 {
267			continue
268		}
269		set.lists[i] = make(List, le)
270		for j := range set.lists[i] {
271			set.lists[i][j] = ls.LoadInts()
272		}
273	}
274	set.maxOffsets = make([][2]int, ls.LoadSmallInt())
275	for i := range set.maxOffsets {
276		set.maxOffsets[i] = [2]int{ls.LoadInt(), ls.LoadInt()}
277	}
278	return set
279}
280
281// Add a priority list to a set. The length is the number of signatures the priority list applies to, not the length of the priority list.
282// This length will only differ when no priorities are set for a given set of signatures.
283func (s *Set) Add(l List, length, bof, eof int) {
284	var last int
285	if len(s.idx) > 0 {
286		last = s.idx[len(s.idx)-1]
287	}
288	s.idx = append(s.idx, length+last)
289	s.lists = append(s.lists, l)
290	s.maxOffsets = append(s.maxOffsets, [2]int{bof, eof})
291}
292
293func (s *Set) list(i, j int) []int {
294	if s.lists[i] == nil {
295		return nil
296	} else {
297		l := s.lists[i][j]
298		if l == nil {
299			l = []int{}
300		}
301		return l
302	}
303}
304
305// at given BOF and EOF offsets, should we still wait on a given priority set?
306func (s *Set) await(idx int, bof, eof int64) bool {
307	if s.maxOffsets[idx][0] < 0 || (s.maxOffsets[idx][0] > 0 && int64(s.maxOffsets[idx][0]) >= bof) {
308		return true
309	}
310	if s.maxOffsets[idx][1] < 0 || (s.maxOffsets[idx][1] > 0 && int64(s.maxOffsets[idx][1]) >= eof) {
311		return true
312	}
313	return false
314}
315
316// Index return the index of the s.lists for the wait list, and return the previous tally
317// previous tally is necessary for adding to the values in the priority list to give real priorities
318func (s *Set) Index(i int) (int, int) {
319	var prev int
320	for idx, v := range s.idx {
321		if i < v {
322			return idx, prev
323		}
324		prev = v
325	}
326	// should never get here. Signal error
327	return -1, -1
328}
329
330// A wait set is a mutating structure that holds the set of indexes that should be waited for while matching underway
331type WaitSet struct {
332	*Set
333	wait  [][]int // a nil list means we're not waiting on anything yet; an empty list means nothing to wait for i.e. satisifed
334	this  []int   // record last hit so can avoid pivotting to weaker matches
335	pivot [][]int // a pivot list is a list of indexes that we could potentially pivot to. E.g. for a .pdf file that has mp3 signatures, but is actually a PDF
336	m     *sync.RWMutex
337}
338
339// WaitSet creates a new WaitSet given a list of hints
340func (s *Set) WaitSet(hints ...core.Hint) *WaitSet {
341	ws := &WaitSet{
342		s,
343		make([][]int, len(s.lists)),
344		make([]int, len(s.lists)),
345		make([][]int, len(s.lists)),
346		&sync.RWMutex{},
347	}
348	for _, h := range hints {
349		idx, _ := s.Index(h.Exclude)
350		if h.Pivot == nil { // if h.Pivot is nil (as opposed to empty slice), it is a signal that that matcher is satisfied
351			ws.wait[idx] = []int{}
352		} else {
353			ws.pivot[idx] = h.Pivot
354		}
355	}
356	return ws
357}
358
359// MaxOffsets returns max/min offset info in order to override the max/min offsets set on the bytematcher when
360// any identifiers have been excluded.
361func (w *WaitSet) MaxOffsets() (int, int) {
362	var bof, eof int
363	for i, v := range w.wait {
364		if v == nil {
365			if bof >= 0 && (w.maxOffsets[i][0] < 0 || bof < w.maxOffsets[i][0]) {
366				bof = w.maxOffsets[i][0]
367			}
368			if eof >= 0 && (w.maxOffsets[i][1] < 0 || eof < w.maxOffsets[i][1]) {
369				eof = w.maxOffsets[i][1]
370			}
371		}
372	}
373	return bof, eof
374}
375
376func inPivot(i int, ii []int) bool {
377	for _, v := range ii {
378		if i == v {
379			return true
380		}
381	}
382	return false
383}
384
385func mightPivot(i int, ii []int) bool {
386	return len(ii) > 0 && !inPivot(i, ii)
387}
388
389// Set the priority list & return a boolean indicating whether the WaitSet is satisfied such that matching can stop (i.e. no priority list is nil, and all are empty)
390func (w *WaitSet) Put(i int) bool {
391	idx, prev := w.Index(i)
392	l := w.list(idx, i-prev)
393	// no priorities for this set, return false immediately
394	if l == nil {
395		return false
396	}
397	w.m.Lock()
398	defer w.m.Unlock()
399	// set the wait list
400	w.wait[idx] = l
401	// set this
402	w.this[idx] = i - prev
403	mp := mightPivot(i, w.pivot[idx])
404	if !mp {
405		w.pivot[idx] = nil // ditch the pivot list if it is just confirming a match or empty
406	}
407	// if we have any priorities, then we aren't satisified
408	if len(l) > 0 || mp {
409		return false
410	}
411	// if l is 0, and we have only one priority set, and we're not going to pivot, then we are satisfied
412	if len(w.wait) == 1 && !mp {
413		return true
414	}
415	// otherwise, let's check all the other priority sets for wait sets or pivot lists
416	for i, v := range w.wait {
417		if i == idx {
418			continue
419		}
420		if v == nil || len(v) > 0 || len(w.pivot[i]) > 0 {
421			return false
422		}
423	}
424	return true
425}
426
427// Set the priority list & return a boolean indicating whether the WaitSet is satisfied such that matching can stop (i.e. no priority list is nil, and all are empty)
428func (w *WaitSet) PutAt(i int, bof, eof int64) bool {
429	idx, prev := w.Index(i)
430	l := w.list(idx, i-prev)
431	// no priorities for this set, return false immediately
432	if l == nil && w.await(idx, bof, eof) {
433		return false
434	}
435	w.m.Lock()
436	defer w.m.Unlock()
437	// set the wait list
438	w.wait[idx] = l
439	// set this
440	w.this[idx] = i - prev
441	mp := mightPivot(i, w.pivot[idx])
442	if !mp {
443		w.pivot[idx] = nil // ditch the pivot list if it is just confirming a match or empty
444	}
445	// if we have any priorities, then we aren't satisified
446	if (len(l) > 0 || mp) && w.await(idx, bof, eof) {
447		return false
448	}
449	// if l is 0, and we have only one priority set, and we're not going to pivot, then we are satisfied
450	if len(w.wait) == 1 && !mp {
451		return true
452	}
453	// otherwise, let's check all the other priority sets
454	for i, v := range w.wait {
455		if i == idx {
456			continue
457		}
458		if w.await(i, bof, eof) {
459			if v == nil || len(v) > 0 || len(w.pivot[i]) > 0 {
460				return false
461			}
462		}
463	}
464	return true
465}
466
467// Check a signature index against the appropriate priority list. Should we continue trying to match this signature?
468func (w *WaitSet) Check(i int) bool {
469	idx, prev := w.Index(i)
470	w.m.RLock()
471	defer w.m.RUnlock()
472	return w.check(i, idx, prev)
473}
474
475func (w *WaitSet) check(i, idx, prev int) bool {
476	if w.wait[idx] == nil {
477		return true
478	}
479	j := sort.SearchInts(w.wait[idx], i-prev)
480	if j == len(w.wait[idx]) || w.wait[idx][j] != i-prev {
481		if inPivot(i, w.pivot[idx]) {
482			l := w.list(idx, i-prev)
483			k := sort.SearchInts(l, w.this[idx])
484			if k < len(l) && l[k] == w.this[idx] {
485				return false
486			}
487			return true
488		}
489		return false
490	}
491	return true
492}
493
494// Filter a waitset with a list of potential matches, return only those that we are still waiting on. Return nil if none.
495func (w *WaitSet) Filter(l []int) []int {
496	ret := make([]int, 0, len(l))
497	w.m.RLock()
498	defer w.m.RUnlock()
499	for _, v := range l {
500		idx, prev := w.Index(v)
501		if w.check(v, idx, prev) {
502			ret = append(ret, v)
503		}
504	}
505	if len(ret) == 0 {
506		return nil
507	}
508	return ret
509}
510
511type Filterable interface {
512	Next() int
513	Mark(bool)
514}
515
516func (w *WaitSet) ApplyFilter(f Filterable) {
517	w.m.RLock()
518	defer w.m.RUnlock()
519	for i := f.Next(); i > -1; i = f.Next() {
520		idx, prev := w.Index(i)
521		f.Mark(w.check(i, idx, prev))
522	}
523}
524
525// For periodic checking - what signatures are we currently waiting on?
526// Accumulates values from all the priority lists within the set.
527// Returns nil if *any* of the priority lists is nil.
528func (w *WaitSet) WaitingOn() []int {
529	w.m.RLock()
530	defer w.m.RUnlock()
531	var l int
532	for i, v := range w.wait {
533		if v == nil {
534			return nil
535		}
536		l = l + len(v) + len(w.pivot[i])
537	}
538	ret := make([]int, l)
539	var prev, j int
540	for i, v := range w.wait {
541		for _, x := range v {
542			ret[j] = x + prev
543			j++
544		}
545		copy(ret[j:], w.pivot[i])
546		j += len(w.pivot[i])
547		prev = w.idx[i]
548	}
549	return ret
550}
551
552// For periodic checking - what signatures are we currently waiting on, at the given offsets?
553// Accumulates values from all the priority lists within the set.
554// Returns nil if *any* of the priority lists is nil.
555func (w *WaitSet) WaitingOnAt(bof, eof int64) []int {
556	w.m.RLock()
557	defer w.m.RUnlock()
558	var l int
559	for i, v := range w.wait {
560		if w.await(i, bof, eof) {
561			if v == nil {
562				return nil
563			}
564			l = l + len(v) + len(w.pivot[i])
565		}
566	}
567	ret := make([]int, l)
568	var prev, j int
569	for i, v := range w.wait {
570		if w.await(i, bof, eof) {
571			for _, x := range v {
572				ret[j] = x + prev
573				j++
574			}
575			copy(ret[j:], w.pivot[i])
576			j += len(w.pivot[i])
577		}
578		prev = w.idx[i]
579	}
580	return ret
581}
582