1package configs 2 3import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "os/exec" 8 "time" 9 10 "github.com/Sirupsen/logrus" 11) 12 13type Rlimit struct { 14 Type int `json:"type"` 15 Hard uint64 `json:"hard"` 16 Soft uint64 `json:"soft"` 17} 18 19// IDMap represents UID/GID Mappings for User Namespaces. 20type IDMap struct { 21 ContainerID int `json:"container_id"` 22 HostID int `json:"host_id"` 23 Size int `json:"size"` 24} 25 26// Seccomp represents syscall restrictions 27// By default, only the native architecture of the kernel is allowed to be used 28// for syscalls. Additional architectures can be added by specifying them in 29// Architectures. 30type Seccomp struct { 31 DefaultAction Action `json:"default_action"` 32 Architectures []string `json:"architectures"` 33 Syscalls []*Syscall `json:"syscalls"` 34} 35 36// An action to be taken upon rule match in Seccomp 37type Action int 38 39const ( 40 Kill Action = iota + 1 41 Errno 42 Trap 43 Allow 44 Trace 45) 46 47// A comparison operator to be used when matching syscall arguments in Seccomp 48type Operator int 49 50const ( 51 EqualTo Operator = iota + 1 52 NotEqualTo 53 GreaterThan 54 GreaterThanOrEqualTo 55 LessThan 56 LessThanOrEqualTo 57 MaskEqualTo 58) 59 60// A rule to match a specific syscall argument in Seccomp 61type Arg struct { 62 Index uint `json:"index"` 63 Value uint64 `json:"value"` 64 ValueTwo uint64 `json:"value_two"` 65 Op Operator `json:"op"` 66} 67 68// An rule to match a syscall in Seccomp 69type Syscall struct { 70 Name string `json:"name"` 71 Action Action `json:"action"` 72 Args []*Arg `json:"args"` 73} 74 75// TODO Windows. Many of these fields should be factored out into those parts 76// which are common across platforms, and those which are platform specific. 77 78// Config defines configuration options for executing a process inside a contained environment. 79type Config struct { 80 // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs 81 // This is a common option when the container is running in ramdisk 82 NoPivotRoot bool `json:"no_pivot_root"` 83 84 // ParentDeathSignal specifies the signal that is sent to the container's process in the case 85 // that the parent process dies. 86 ParentDeathSignal int `json:"parent_death_signal"` 87 88 // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. 89 // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. 90 // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. 91 PivotDir string `json:"pivot_dir"` 92 93 // Path to a directory containing the container's root filesystem. 94 Rootfs string `json:"rootfs"` 95 96 // Readonlyfs will remount the container's rootfs as readonly where only externally mounted 97 // bind mounts are writtable. 98 Readonlyfs bool `json:"readonlyfs"` 99 100 // Specifies the mount propagation flags to be applied to /. 101 RootPropagation int `json:"rootPropagation"` 102 103 // Mounts specify additional source and destination paths that will be mounted inside the container's 104 // rootfs and mount namespace if specified 105 Mounts []*Mount `json:"mounts"` 106 107 // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! 108 Devices []*Device `json:"devices"` 109 110 MountLabel string `json:"mount_label"` 111 112 // Hostname optionally sets the container's hostname if provided 113 Hostname string `json:"hostname"` 114 115 // Namespaces specifies the container's namespaces that it should setup when cloning the init process 116 // If a namespace is not provided that namespace is shared from the container's parent process 117 Namespaces Namespaces `json:"namespaces"` 118 119 // Capabilities specify the capabilities to keep when executing the process inside the container 120 // All capbilities not specified will be dropped from the processes capability mask 121 Capabilities []string `json:"capabilities"` 122 123 // Networks specifies the container's network setup to be created 124 Networks []*Network `json:"networks"` 125 126 // Routes can be specified to create entries in the route table as the container is started 127 Routes []*Route `json:"routes"` 128 129 // Cgroups specifies specific cgroup settings for the various subsystems that the container is 130 // placed into to limit the resources the container has available 131 Cgroups *Cgroup `json:"cgroups"` 132 133 // AppArmorProfile specifies the profile to apply to the process running in the container and is 134 // change at the time the process is execed 135 AppArmorProfile string `json:"apparmor_profile,omitempty"` 136 137 // ProcessLabel specifies the label to apply to the process running in the container. It is 138 // commonly used by selinux 139 ProcessLabel string `json:"process_label,omitempty"` 140 141 // Rlimits specifies the resource limits, such as max open files, to set in the container 142 // If Rlimits are not set, the container will inherit rlimits from the parent process 143 Rlimits []Rlimit `json:"rlimits,omitempty"` 144 145 // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores 146 // for a process. Valid values are between the range [-1000, '1000'], where processes with 147 // higher scores are preferred for being killed. 148 // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ 149 OomScoreAdj int `json:"oom_score_adj"` 150 151 // AdditionalGroups specifies the gids that should be added to supplementary groups 152 // in addition to those that the user belongs to. 153 AdditionalGroups []string `json:"additional_groups"` 154 155 // UidMappings is an array of User ID mappings for User Namespaces 156 UidMappings []IDMap `json:"uid_mappings"` 157 158 // GidMappings is an array of Group ID mappings for User Namespaces 159 GidMappings []IDMap `json:"gid_mappings"` 160 161 // MaskPaths specifies paths within the container's rootfs to mask over with a bind 162 // mount pointing to /dev/null as to prevent reads of the file. 163 MaskPaths []string `json:"mask_paths"` 164 165 // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only 166 // so that these files prevent any writes. 167 ReadonlyPaths []string `json:"readonly_paths"` 168 169 // Sysctl is a map of properties and their values. It is the equivalent of using 170 // sysctl -w my.property.name value in Linux. 171 Sysctl map[string]string `json:"sysctl"` 172 173 // Seccomp allows actions to be taken whenever a syscall is made within the container. 174 // A number of rules are given, each having an action to be taken if a syscall matches it. 175 // A default action to be taken if no rules match is also given. 176 Seccomp *Seccomp `json:"seccomp"` 177 178 // NoNewPrivileges controls whether processes in the container can gain additional privileges. 179 NoNewPrivileges bool `json:"no_new_privileges,omitempty"` 180 181 // Hooks are a collection of actions to perform at various container lifecycle events. 182 // CommandHooks are serialized to JSON, but other hooks are not. 183 Hooks *Hooks 184 185 // Version is the version of opencontainer specification that is supported. 186 Version string `json:"version"` 187 188 // Labels are user defined metadata that is stored in the config and populated on the state 189 Labels []string `json:"labels"` 190} 191 192type Hooks struct { 193 // Prestart commands are executed after the container namespaces are created, 194 // but before the user supplied command is executed from init. 195 Prestart []Hook 196 197 // Poststart commands are executed after the container init process starts. 198 Poststart []Hook 199 200 // Poststop commands are executed after the container init process exits. 201 Poststop []Hook 202} 203 204func (hooks *Hooks) UnmarshalJSON(b []byte) error { 205 var state struct { 206 Prestart []CommandHook 207 Poststart []CommandHook 208 Poststop []CommandHook 209 } 210 211 if err := json.Unmarshal(b, &state); err != nil { 212 return err 213 } 214 215 deserialize := func(shooks []CommandHook) (hooks []Hook) { 216 for _, shook := range shooks { 217 hooks = append(hooks, shook) 218 } 219 220 return hooks 221 } 222 223 hooks.Prestart = deserialize(state.Prestart) 224 hooks.Poststart = deserialize(state.Poststart) 225 hooks.Poststop = deserialize(state.Poststop) 226 return nil 227} 228 229func (hooks Hooks) MarshalJSON() ([]byte, error) { 230 serialize := func(hooks []Hook) (serializableHooks []CommandHook) { 231 for _, hook := range hooks { 232 switch chook := hook.(type) { 233 case CommandHook: 234 serializableHooks = append(serializableHooks, chook) 235 default: 236 logrus.Warnf("cannot serialize hook of type %T, skipping", hook) 237 } 238 } 239 240 return serializableHooks 241 } 242 243 return json.Marshal(map[string]interface{}{ 244 "prestart": serialize(hooks.Prestart), 245 "poststart": serialize(hooks.Poststart), 246 "poststop": serialize(hooks.Poststop), 247 }) 248} 249 250// HookState is the payload provided to a hook on execution. 251type HookState struct { 252 Version string `json:"ociVersion"` 253 ID string `json:"id"` 254 Pid int `json:"pid"` 255 Root string `json:"root"` 256 BundlePath string `json:"bundlePath"` 257} 258 259type Hook interface { 260 // Run executes the hook with the provided state. 261 Run(HookState) error 262} 263 264// NewFunctionHooks will call the provided function when the hook is run. 265func NewFunctionHook(f func(HookState) error) FuncHook { 266 return FuncHook{ 267 run: f, 268 } 269} 270 271type FuncHook struct { 272 run func(HookState) error 273} 274 275func (f FuncHook) Run(s HookState) error { 276 return f.run(s) 277} 278 279type Command struct { 280 Path string `json:"path"` 281 Args []string `json:"args"` 282 Env []string `json:"env"` 283 Dir string `json:"dir"` 284 Timeout *time.Duration `json:"timeout"` 285} 286 287// NewCommandHooks will execute the provided command when the hook is run. 288func NewCommandHook(cmd Command) CommandHook { 289 return CommandHook{ 290 Command: cmd, 291 } 292} 293 294type CommandHook struct { 295 Command 296} 297 298func (c Command) Run(s HookState) error { 299 b, err := json.Marshal(s) 300 if err != nil { 301 return err 302 } 303 cmd := exec.Cmd{ 304 Path: c.Path, 305 Args: c.Args, 306 Env: c.Env, 307 Stdin: bytes.NewReader(b), 308 } 309 errC := make(chan error, 1) 310 go func() { 311 out, err := cmd.CombinedOutput() 312 if err != nil { 313 err = fmt.Errorf("%s: %s", err, out) 314 } 315 errC <- err 316 }() 317 if c.Timeout != nil { 318 select { 319 case err := <-errC: 320 return err 321 case <-time.After(*c.Timeout): 322 cmd.Process.Kill() 323 cmd.Wait() 324 return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds()) 325 } 326 } 327 return <-errC 328} 329