1// Package devicefilter contains eBPF device filter program 2// 3// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c 4// 5// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) 6// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 7package devicefilter 8 9import ( 10 "math" 11 "strconv" 12 13 "github.com/cilium/ebpf/asm" 14 devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices" 15 "github.com/opencontainers/runc/libcontainer/devices" 16 "github.com/pkg/errors" 17 "golang.org/x/sys/unix" 18) 19 20const ( 21 // license string format is same as kernel MODULE_LICENSE macro 22 license = "Apache" 23) 24 25// DeviceFilter returns eBPF device filter program and its license string 26func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) { 27 // Generate the minimum ruleset for the device rules we are given. While we 28 // don't care about minimum transitions in cgroupv2, using the emulator 29 // gives us a guarantee that the behaviour of devices filtering is the same 30 // as cgroupv1, including security hardenings to avoid misconfiguration 31 // (such as punching holes in wildcard rules). 32 emu := new(devicesemulator.Emulator) 33 for _, rule := range rules { 34 if err := emu.Apply(*rule); err != nil { 35 return nil, "", err 36 } 37 } 38 cleanRules, err := emu.Rules() 39 if err != nil { 40 return nil, "", err 41 } 42 43 p := &program{ 44 defaultAllow: emu.IsBlacklist(), 45 } 46 p.init() 47 48 for idx, rule := range cleanRules { 49 if rule.Type == devices.WildcardDevice { 50 // We can safely skip over wildcard entries because there should 51 // only be one (at most) at the very start to instruct cgroupv1 to 52 // go into allow-list mode. However we do double-check this here. 53 if idx != 0 || rule.Allow != emu.IsBlacklist() { 54 return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString()) 55 } 56 continue 57 } 58 if rule.Allow == p.defaultAllow { 59 // There should be no rules which have an action equal to the 60 // default action, the emulator removes those. 61 return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString()) 62 } 63 if err := p.appendRule(rule); err != nil { 64 return nil, "", err 65 } 66 } 67 insts, err := p.finalize() 68 return insts, license, err 69} 70 71type program struct { 72 insts asm.Instructions 73 defaultAllow bool 74 blockID int 75} 76 77func (p *program) init() { 78 // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 79 /* 80 u32 access_type 81 u32 major 82 u32 minor 83 */ 84 // R2 <- type (lower 16 bit of u32 access_type at R1[0]) 85 p.insts = append(p.insts, 86 asm.LoadMem(asm.R2, asm.R1, 0, asm.Word), 87 asm.And.Imm32(asm.R2, 0xFFFF)) 88 89 // R3 <- access (upper 16 bit of u32 access_type at R1[0]) 90 p.insts = append(p.insts, 91 asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), 92 // RSh: bitwise shift right 93 asm.RSh.Imm32(asm.R3, 16)) 94 95 // R4 <- major (u32 major at R1[4]) 96 p.insts = append(p.insts, 97 asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) 98 99 // R5 <- minor (u32 minor at R1[8]) 100 p.insts = append(p.insts, 101 asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) 102} 103 104// appendRule rule converts an OCI rule to the relevant eBPF block and adds it 105// to the in-progress filter program. In order to operate properly, it must be 106// called with a "clean" rule list (generated by devices.Emulator.Rules() -- 107// with any "a" rules removed). 108func (p *program) appendRule(rule *devices.Rule) error { 109 if p.blockID < 0 { 110 return errors.New("the program is finalized") 111 } 112 113 var bpfType int32 114 switch rule.Type { 115 case devices.CharDevice: 116 bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) 117 case devices.BlockDevice: 118 bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) 119 default: 120 // We do not permit 'a', nor any other types we don't know about. 121 return errors.Errorf("invalid type %q", string(rule.Type)) 122 } 123 if rule.Major > math.MaxUint32 { 124 return errors.Errorf("invalid major %d", rule.Major) 125 } 126 if rule.Minor > math.MaxUint32 { 127 return errors.Errorf("invalid minor %d", rule.Major) 128 } 129 hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1 130 hasMinor := rule.Minor >= 0 131 bpfAccess := int32(0) 132 for _, r := range rule.Permissions { 133 switch r { 134 case 'r': 135 bpfAccess |= unix.BPF_DEVCG_ACC_READ 136 case 'w': 137 bpfAccess |= unix.BPF_DEVCG_ACC_WRITE 138 case 'm': 139 bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD 140 default: 141 return errors.Errorf("unknown device access %v", r) 142 } 143 } 144 // If the access is rwm, skip the check. 145 hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) 146 147 var ( 148 blockSym = "block-" + strconv.Itoa(p.blockID) 149 nextBlockSym = "block-" + strconv.Itoa(p.blockID+1) 150 prevBlockLastIdx = len(p.insts) - 1 151 ) 152 p.insts = append(p.insts, 153 // if (R2 != bpfType) goto next 154 asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), 155 ) 156 if hasAccess { 157 p.insts = append(p.insts, 158 // if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next 159 asm.Mov.Reg32(asm.R1, asm.R3), 160 asm.And.Imm32(asm.R1, bpfAccess), 161 asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym), 162 ) 163 } 164 if hasMajor { 165 p.insts = append(p.insts, 166 // if (R4 != major) goto next 167 asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym), 168 ) 169 } 170 if hasMinor { 171 p.insts = append(p.insts, 172 // if (R5 != minor) goto next 173 asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym), 174 ) 175 } 176 p.insts = append(p.insts, acceptBlock(rule.Allow)...) 177 // set blockSym to the first instruction we added in this iteration 178 p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) 179 p.blockID++ 180 return nil 181} 182 183func (p *program) finalize() (asm.Instructions, error) { 184 var v int32 185 if p.defaultAllow { 186 v = 1 187 } 188 blockSym := "block-" + strconv.Itoa(p.blockID) 189 p.insts = append(p.insts, 190 // R0 <- v 191 asm.Mov.Imm32(asm.R0, v).Sym(blockSym), 192 asm.Return(), 193 ) 194 p.blockID = -1 195 return p.insts, nil 196} 197 198func acceptBlock(accept bool) asm.Instructions { 199 var v int32 200 if accept { 201 v = 1 202 } 203 return []asm.Instruction{ 204 // R0 <- v 205 asm.Mov.Imm32(asm.R0, v), 206 asm.Return(), 207 } 208} 209