1package configs
2
3import (
4	"bytes"
5	"encoding/json"
6	"fmt"
7	"os/exec"
8	"time"
9
10	"github.com/Sirupsen/logrus"
11)
12
13type Rlimit struct {
14	Type int    `json:"type"`
15	Hard uint64 `json:"hard"`
16	Soft uint64 `json:"soft"`
17}
18
19// IDMap represents UID/GID Mappings for User Namespaces.
20type IDMap struct {
21	ContainerID int `json:"container_id"`
22	HostID      int `json:"host_id"`
23	Size        int `json:"size"`
24}
25
26// Seccomp represents syscall restrictions
27// By default, only the native architecture of the kernel is allowed to be used
28// for syscalls. Additional architectures can be added by specifying them in
29// Architectures.
30type Seccomp struct {
31	DefaultAction Action     `json:"default_action"`
32	Architectures []string   `json:"architectures"`
33	Syscalls      []*Syscall `json:"syscalls"`
34}
35
36// An action to be taken upon rule match in Seccomp
37type Action int
38
39const (
40	Kill Action = iota + 1
41	Errno
42	Trap
43	Allow
44	Trace
45)
46
47// A comparison operator to be used when matching syscall arguments in Seccomp
48type Operator int
49
50const (
51	EqualTo Operator = iota + 1
52	NotEqualTo
53	GreaterThan
54	GreaterThanOrEqualTo
55	LessThan
56	LessThanOrEqualTo
57	MaskEqualTo
58)
59
60// A rule to match a specific syscall argument in Seccomp
61type Arg struct {
62	Index    uint     `json:"index"`
63	Value    uint64   `json:"value"`
64	ValueTwo uint64   `json:"value_two"`
65	Op       Operator `json:"op"`
66}
67
68// An rule to match a syscall in Seccomp
69type Syscall struct {
70	Name   string `json:"name"`
71	Action Action `json:"action"`
72	Args   []*Arg `json:"args"`
73}
74
75// TODO Windows. Many of these fields should be factored out into those parts
76// which are common across platforms, and those which are platform specific.
77
78// Config defines configuration options for executing a process inside a contained environment.
79type Config struct {
80	// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
81	// This is a common option when the container is running in ramdisk
82	NoPivotRoot bool `json:"no_pivot_root"`
83
84	// ParentDeathSignal specifies the signal that is sent to the container's process in the case
85	// that the parent process dies.
86	ParentDeathSignal int `json:"parent_death_signal"`
87
88	// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
89	// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
90	// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
91	PivotDir string `json:"pivot_dir"`
92
93	// Path to a directory containing the container's root filesystem.
94	Rootfs string `json:"rootfs"`
95
96	// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
97	// bind mounts are writtable.
98	Readonlyfs bool `json:"readonlyfs"`
99
100	// Specifies the mount propagation flags to be applied to /.
101	RootPropagation int `json:"rootPropagation"`
102
103	// Mounts specify additional source and destination paths that will be mounted inside the container's
104	// rootfs and mount namespace if specified
105	Mounts []*Mount `json:"mounts"`
106
107	// The device nodes that should be automatically created within the container upon container start.  Note, make sure that the node is marked as allowed in the cgroup as well!
108	Devices []*Device `json:"devices"`
109
110	MountLabel string `json:"mount_label"`
111
112	// Hostname optionally sets the container's hostname if provided
113	Hostname string `json:"hostname"`
114
115	// Namespaces specifies the container's namespaces that it should setup when cloning the init process
116	// If a namespace is not provided that namespace is shared from the container's parent process
117	Namespaces Namespaces `json:"namespaces"`
118
119	// Capabilities specify the capabilities to keep when executing the process inside the container
120	// All capbilities not specified will be dropped from the processes capability mask
121	Capabilities []string `json:"capabilities"`
122
123	// Networks specifies the container's network setup to be created
124	Networks []*Network `json:"networks"`
125
126	// Routes can be specified to create entries in the route table as the container is started
127	Routes []*Route `json:"routes"`
128
129	// Cgroups specifies specific cgroup settings for the various subsystems that the container is
130	// placed into to limit the resources the container has available
131	Cgroups *Cgroup `json:"cgroups"`
132
133	// AppArmorProfile specifies the profile to apply to the process running in the container and is
134	// change at the time the process is execed
135	AppArmorProfile string `json:"apparmor_profile,omitempty"`
136
137	// ProcessLabel specifies the label to apply to the process running in the container.  It is
138	// commonly used by selinux
139	ProcessLabel string `json:"process_label,omitempty"`
140
141	// Rlimits specifies the resource limits, such as max open files, to set in the container
142	// If Rlimits are not set, the container will inherit rlimits from the parent process
143	Rlimits []Rlimit `json:"rlimits,omitempty"`
144
145	// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
146	// for a process. Valid values are between the range [-1000, '1000'], where processes with
147	// higher scores are preferred for being killed.
148	// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
149	OomScoreAdj int `json:"oom_score_adj"`
150
151	// AdditionalGroups specifies the gids that should be added to supplementary groups
152	// in addition to those that the user belongs to.
153	AdditionalGroups []string `json:"additional_groups"`
154
155	// UidMappings is an array of User ID mappings for User Namespaces
156	UidMappings []IDMap `json:"uid_mappings"`
157
158	// GidMappings is an array of Group ID mappings for User Namespaces
159	GidMappings []IDMap `json:"gid_mappings"`
160
161	// MaskPaths specifies paths within the container's rootfs to mask over with a bind
162	// mount pointing to /dev/null as to prevent reads of the file.
163	MaskPaths []string `json:"mask_paths"`
164
165	// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
166	// so that these files prevent any writes.
167	ReadonlyPaths []string `json:"readonly_paths"`
168
169	// Sysctl is a map of properties and their values. It is the equivalent of using
170	// sysctl -w my.property.name value in Linux.
171	Sysctl map[string]string `json:"sysctl"`
172
173	// Seccomp allows actions to be taken whenever a syscall is made within the container.
174	// A number of rules are given, each having an action to be taken if a syscall matches it.
175	// A default action to be taken if no rules match is also given.
176	Seccomp *Seccomp `json:"seccomp"`
177
178	// NoNewPrivileges controls whether processes in the container can gain additional privileges.
179	NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
180
181	// Hooks are a collection of actions to perform at various container lifecycle events.
182	// CommandHooks are serialized to JSON, but other hooks are not.
183	Hooks *Hooks
184
185	// Version is the version of opencontainer specification that is supported.
186	Version string `json:"version"`
187
188	// Labels are user defined metadata that is stored in the config and populated on the state
189	Labels []string `json:"labels"`
190}
191
192type Hooks struct {
193	// Prestart commands are executed after the container namespaces are created,
194	// but before the user supplied command is executed from init.
195	Prestart []Hook
196
197	// Poststart commands are executed after the container init process starts.
198	Poststart []Hook
199
200	// Poststop commands are executed after the container init process exits.
201	Poststop []Hook
202}
203
204func (hooks *Hooks) UnmarshalJSON(b []byte) error {
205	var state struct {
206		Prestart  []CommandHook
207		Poststart []CommandHook
208		Poststop  []CommandHook
209	}
210
211	if err := json.Unmarshal(b, &state); err != nil {
212		return err
213	}
214
215	deserialize := func(shooks []CommandHook) (hooks []Hook) {
216		for _, shook := range shooks {
217			hooks = append(hooks, shook)
218		}
219
220		return hooks
221	}
222
223	hooks.Prestart = deserialize(state.Prestart)
224	hooks.Poststart = deserialize(state.Poststart)
225	hooks.Poststop = deserialize(state.Poststop)
226	return nil
227}
228
229func (hooks Hooks) MarshalJSON() ([]byte, error) {
230	serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
231		for _, hook := range hooks {
232			switch chook := hook.(type) {
233			case CommandHook:
234				serializableHooks = append(serializableHooks, chook)
235			default:
236				logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
237			}
238		}
239
240		return serializableHooks
241	}
242
243	return json.Marshal(map[string]interface{}{
244		"prestart":  serialize(hooks.Prestart),
245		"poststart": serialize(hooks.Poststart),
246		"poststop":  serialize(hooks.Poststop),
247	})
248}
249
250// HookState is the payload provided to a hook on execution.
251type HookState struct {
252	Version    string `json:"ociVersion"`
253	ID         string `json:"id"`
254	Pid        int    `json:"pid"`
255	Root       string `json:"root"`
256	BundlePath string `json:"bundlePath"`
257}
258
259type Hook interface {
260	// Run executes the hook with the provided state.
261	Run(HookState) error
262}
263
264// NewFunctionHooks will call the provided function when the hook is run.
265func NewFunctionHook(f func(HookState) error) FuncHook {
266	return FuncHook{
267		run: f,
268	}
269}
270
271type FuncHook struct {
272	run func(HookState) error
273}
274
275func (f FuncHook) Run(s HookState) error {
276	return f.run(s)
277}
278
279type Command struct {
280	Path    string         `json:"path"`
281	Args    []string       `json:"args"`
282	Env     []string       `json:"env"`
283	Dir     string         `json:"dir"`
284	Timeout *time.Duration `json:"timeout"`
285}
286
287// NewCommandHooks will execute the provided command when the hook is run.
288func NewCommandHook(cmd Command) CommandHook {
289	return CommandHook{
290		Command: cmd,
291	}
292}
293
294type CommandHook struct {
295	Command
296}
297
298func (c Command) Run(s HookState) error {
299	b, err := json.Marshal(s)
300	if err != nil {
301		return err
302	}
303	cmd := exec.Cmd{
304		Path:  c.Path,
305		Args:  c.Args,
306		Env:   c.Env,
307		Stdin: bytes.NewReader(b),
308	}
309	errC := make(chan error, 1)
310	go func() {
311		out, err := cmd.CombinedOutput()
312		if err != nil {
313			err = fmt.Errorf("%s: %s", err, out)
314		}
315		errC <- err
316	}()
317	if c.Timeout != nil {
318		select {
319		case err := <-errC:
320			return err
321		case <-time.After(*c.Timeout):
322			cmd.Process.Kill()
323			cmd.Wait()
324			return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
325		}
326	}
327	return <-errC
328}
329