1package capabilities
2
3import (
4	"fmt"
5	"regexp"
6
7	"github.com/syndtr/gocapability/capability"
8)
9
10const (
11	// HCLSpecLiteral is an equivalent list to NomadDefaults, expressed as a literal
12	// HCL string for use in HCL config parsing.
13	HCLSpecLiteral = `["AUDIT_WRITE","CHOWN","DAC_OVERRIDE","FOWNER","FSETID","KILL","MKNOD","NET_BIND_SERVICE","SETFCAP","SETGID","SETPCAP","SETUID","SYS_CHROOT"]`
14)
15
16var (
17	extractLiteral = regexp.MustCompile(`([\w]+)`)
18)
19
20// NomadDefaults is the set of Linux capabilities that Nomad enables by
21// default. This list originates from what Docker enabled by default, but then
22// excludes NET_RAW for security reasons.
23//
24// This set is use in the as HCL configuration default, described by HCLSpecLiteral.
25func NomadDefaults() *Set {
26	return New(extractLiteral.FindAllString(HCLSpecLiteral, -1))
27}
28
29// DockerDefaults is a list of Linux capabilities enabled by Docker by default
30// and is used to compute the set of capabilities to add/drop given docker driver
31// configuration.
32//
33// https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities
34func DockerDefaults() *Set {
35	defaults := NomadDefaults()
36	defaults.Add("NET_RAW")
37	return defaults
38}
39
40// Supported returns the set of capabilities supported by the operating system.
41//
42// This set will expand over time as new capabilities are introduced to the kernel
43// and the capability library is updated (which tends to happen to keep up with
44// run-container libraries).
45//
46// Defers to a library generated from
47// https://github.com/torvalds/linux/blob/master/include/uapi/linux/capability.h
48func Supported() *Set {
49	s := New(nil)
50
51	last := capability.CAP_LAST_CAP
52
53	// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
54	if last == capability.Cap(63) {
55		last = capability.CAP_BLOCK_SUSPEND
56	}
57
58	// accumulate every capability supported by this system
59	for _, c := range capability.List() {
60		if c > last {
61			continue
62		}
63		s.Add(c.String())
64	}
65
66	return s
67}
68
69// LegacySupported returns the historical set of capabilities used when a task is
70// configured to run as root using the exec task driver. Older versions of Nomad
71// always allowed the root user to make use of any capability. Now that the exec
72// task driver supports configuring the allowed capabilities, operators are
73// encouraged to explicitly opt-in to capabilities beyond this legacy set. We
74// maintain the legacy list here, because previous versions of Nomad deferred to
75// the capability.List library function, which adds new capabilities over time.
76//
77// https://github.com/hashicorp/nomad/blob/v1.0.4/vendor/github.com/syndtr/gocapability/capability/enum_gen.go#L88
78func LegacySupported() *Set {
79	return New([]string{
80		"CAP_CHOWN",
81		"CAP_DAC_OVERRIDE",
82		"CAP_DAC_READ_SEARCH",
83		"CAP_FOWNER",
84		"CAP_FSETID",
85		"CAP_KILL",
86		"CAP_SETGID",
87		"CAP_SETUID",
88		"CAP_SETPCAP",
89		"CAP_LINUX_IMMUTABLE",
90		"CAP_NET_BIND_SERVICE",
91		"CAP_NET_BROADCAST",
92		"CAP_NET_ADMIN",
93		"CAP_NET_RAW",
94		"CAP_IPC_LOCK",
95		"CAP_IPC_OWNER",
96		"CAP_SYS_MODULE",
97		"CAP_SYS_RAWIO",
98		"CAP_SYS_CHROOT",
99		"CAP_SYS_PTRACE",
100		"CAP_SYS_PACCT",
101		"CAP_SYS_ADMIN",
102		"CAP_SYS_BOOT",
103		"CAP_SYS_NICE",
104		"CAP_SYS_RESOURCE",
105		"CAP_SYS_TIME",
106		"CAP_SYS_TTY_CONFIG",
107		"CAP_MKNOD",
108		"CAP_LEASE",
109		"CAP_AUDIT_WRITE",
110		"CAP_AUDIT_CONTROL",
111		"CAP_SETFCAP",
112		"CAP_MAC_OVERRIDE",
113		"CAP_MAC_ADMIN",
114		"CAP_SYSLOG",
115		"CAP_WAKE_ALARM",
116		"CAP_BLOCK_SUSPEND",
117		"CAP_AUDIT_READ",
118	})
119}
120
121// Calculate the resulting set of linux capabilities to enable for a task, taking
122// into account:
123// - default capability basis
124// - driver allowable capabilities
125// - task capability drops
126// - task capability adds
127//
128// Nomad establishes a standard set of enabled capabilities allowed by the task
129// driver if allow_caps is not set. This is the same set that the task will be
130// enabled with by default if allow_caps does not further reduce permissions,
131// in which case the task capabilities will also be reduced accordingly.
132//
133// The task will drop any capabilities specified in cap_drop, and add back
134// capabilities specified in cap_add. The task will not be allowed to add capabilities
135// not set in the the allow_caps setting (which by default is the same as the basis).
136//
137// cap_add takes precedence over cap_drop, enabling the common pattern of dropping
138// all capabilities, then adding back the desired smaller set. e.g.
139//   cap_drop = ["all"]
140//   cap_add = ["chown", "kill"]
141//
142// Note that the resulting capability names are upper-cased and prefixed with
143// "CAP_", which is the expected input for the exec/java driver implementation.
144func Calculate(basis *Set, allowCaps, capAdd, capDrop []string) ([]string, error) {
145	allow := New(allowCaps)
146	adds := New(capAdd)
147
148	// determine caps the task wants that are not allowed
149	missing := allow.Difference(adds)
150	if !missing.Empty() {
151		return nil, fmt.Errorf("driver does not allow the following capabilities: %s", missing)
152	}
153
154	// the realized enabled capabilities starts with what is allowed both by driver
155	// config AND is a member of the basis (i.e. nomad defaults)
156	result := basis.Intersect(allow)
157
158	// then remove capabilities the task explicitly drops
159	result.Remove(capDrop)
160
161	// then add back capabilities the task explicitly adds
162	return result.Union(adds).Slice(true), nil
163}
164
165// Delta calculates the set of capabilities that must be added and dropped relative
166// to a basis to achieve a desired result. The use case is that the docker driver
167// assumes a default set (DockerDefault), and we must calculate what to pass into
168// --cap-add and --cap-drop on container creation given the inputs of the docker
169// plugin config for allow_caps, and the docker task configuration for cap_add and
170// cap_drop. Note that the user provided cap_add and cap_drop settings are always
171// included, even if they are redundant with the basis (maintaining existing
172// behavior, working with existing tests).
173//
174// Note that the resulting capability names are lower-cased and not prefixed with
175// "CAP_", which is the existing style used with the docker driver implementation.
176func Delta(basis *Set, allowCaps, capAdd, capDrop []string) ([]string, []string, error) {
177	all := func(caps []string) bool {
178		for _, c := range caps {
179			if normalize(c) == "all" {
180				return true
181			}
182		}
183		return false
184	}
185
186	// set of caps allowed by driver
187	allow := New(allowCaps)
188
189	// determine caps the task wants that are not allowed
190	missing := allow.Difference(New(capAdd))
191	if !missing.Empty() {
192		return nil, nil, fmt.Errorf("driver does not allow the following capabilities: %s", missing)
193	}
194
195	// add what the task is asking for
196	add := New(capAdd).Slice(false)
197	if all(capAdd) {
198		add = []string{"all"}
199	}
200
201	// drop what the task removes plus whatever is in the basis that is not
202	// in the driver allow configuration
203	drop := New(allowCaps).Difference(basis).Union(New(capDrop)).Slice(false)
204	if all(capDrop) {
205		drop = []string{"all"}
206	}
207
208	return add, drop, nil
209}
210