1// Package devicefilter contains eBPF device filter program
2//
3// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
4//
5// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
6// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
7package devicefilter
8
9import (
10	"math"
11	"strconv"
12
13	"github.com/cilium/ebpf/asm"
14	devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
15	"github.com/opencontainers/runc/libcontainer/devices"
16	"github.com/pkg/errors"
17	"golang.org/x/sys/unix"
18)
19
20const (
21	// license string format is same as kernel MODULE_LICENSE macro
22	license = "Apache"
23)
24
25// DeviceFilter returns eBPF device filter program and its license string
26func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
27	// Generate the minimum ruleset for the device rules we are given. While we
28	// don't care about minimum transitions in cgroupv2, using the emulator
29	// gives us a guarantee that the behaviour of devices filtering is the same
30	// as cgroupv1, including security hardenings to avoid misconfiguration
31	// (such as punching holes in wildcard rules).
32	emu := new(devicesemulator.Emulator)
33	for _, rule := range rules {
34		if err := emu.Apply(*rule); err != nil {
35			return nil, "", err
36		}
37	}
38	cleanRules, err := emu.Rules()
39	if err != nil {
40		return nil, "", err
41	}
42
43	p := &program{
44		defaultAllow: emu.IsBlacklist(),
45	}
46	p.init()
47
48	for idx, rule := range cleanRules {
49		if rule.Type == devices.WildcardDevice {
50			// We can safely skip over wildcard entries because there should
51			// only be one (at most) at the very start to instruct cgroupv1 to
52			// go into allow-list mode. However we do double-check this here.
53			if idx != 0 || rule.Allow != emu.IsBlacklist() {
54				return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
55			}
56			continue
57		}
58		if rule.Allow == p.defaultAllow {
59			// There should be no rules which have an action equal to the
60			// default action, the emulator removes those.
61			return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
62		}
63		if err := p.appendRule(rule); err != nil {
64			return nil, "", err
65		}
66	}
67	insts, err := p.finalize()
68	return insts, license, err
69}
70
71type program struct {
72	insts        asm.Instructions
73	defaultAllow bool
74	blockID      int
75}
76
77func (p *program) init() {
78	// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
79	/*
80		u32 access_type
81		u32 major
82		u32 minor
83	*/
84	// R2 <- type (lower 16 bit of u32 access_type at R1[0])
85	p.insts = append(p.insts,
86		asm.LoadMem(asm.R2, asm.R1, 0, asm.Word),
87		asm.And.Imm32(asm.R2, 0xFFFF))
88
89	// R3 <- access (upper 16 bit of u32 access_type at R1[0])
90	p.insts = append(p.insts,
91		asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
92		// RSh: bitwise shift right
93		asm.RSh.Imm32(asm.R3, 16))
94
95	// R4 <- major (u32 major at R1[4])
96	p.insts = append(p.insts,
97		asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
98
99	// R5 <- minor (u32 minor at R1[8])
100	p.insts = append(p.insts,
101		asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
102}
103
104// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
105// to the in-progress filter program. In order to operate properly, it must be
106// called with a "clean" rule list (generated by devices.Emulator.Rules() --
107// with any "a" rules removed).
108func (p *program) appendRule(rule *devices.Rule) error {
109	if p.blockID < 0 {
110		return errors.New("the program is finalized")
111	}
112
113	var bpfType int32
114	switch rule.Type {
115	case devices.CharDevice:
116		bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
117	case devices.BlockDevice:
118		bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
119	default:
120		// We do not permit 'a', nor any other types we don't know about.
121		return errors.Errorf("invalid type %q", string(rule.Type))
122	}
123	if rule.Major > math.MaxUint32 {
124		return errors.Errorf("invalid major %d", rule.Major)
125	}
126	if rule.Minor > math.MaxUint32 {
127		return errors.Errorf("invalid minor %d", rule.Major)
128	}
129	hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
130	hasMinor := rule.Minor >= 0
131	bpfAccess := int32(0)
132	for _, r := range rule.Permissions {
133		switch r {
134		case 'r':
135			bpfAccess |= unix.BPF_DEVCG_ACC_READ
136		case 'w':
137			bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
138		case 'm':
139			bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
140		default:
141			return errors.Errorf("unknown device access %v", r)
142		}
143	}
144	// If the access is rwm, skip the check.
145	hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
146
147	var (
148		blockSym         = "block-" + strconv.Itoa(p.blockID)
149		nextBlockSym     = "block-" + strconv.Itoa(p.blockID+1)
150		prevBlockLastIdx = len(p.insts) - 1
151	)
152	p.insts = append(p.insts,
153		// if (R2 != bpfType) goto next
154		asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
155	)
156	if hasAccess {
157		p.insts = append(p.insts,
158			// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
159			asm.Mov.Reg32(asm.R1, asm.R3),
160			asm.And.Imm32(asm.R1, bpfAccess),
161			asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym),
162		)
163	}
164	if hasMajor {
165		p.insts = append(p.insts,
166			// if (R4 != major) goto next
167			asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
168		)
169	}
170	if hasMinor {
171		p.insts = append(p.insts,
172			// if (R5 != minor) goto next
173			asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
174		)
175	}
176	p.insts = append(p.insts, acceptBlock(rule.Allow)...)
177	// set blockSym to the first instruction we added in this iteration
178	p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
179	p.blockID++
180	return nil
181}
182
183func (p *program) finalize() (asm.Instructions, error) {
184	var v int32
185	if p.defaultAllow {
186		v = 1
187	}
188	blockSym := "block-" + strconv.Itoa(p.blockID)
189	p.insts = append(p.insts,
190		// R0 <- v
191		asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
192		asm.Return(),
193	)
194	p.blockID = -1
195	return p.insts, nil
196}
197
198func acceptBlock(accept bool) asm.Instructions {
199	var v int32
200	if accept {
201		v = 1
202	}
203	return []asm.Instruction{
204		// R0 <- v
205		asm.Mov.Imm32(asm.R0, v),
206		asm.Return(),
207	}
208}
209