1// +build linux 2 3package libcontainer 4 5import ( 6 "encoding/json" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "net" 11 "os" 12 "strconv" 13 "strings" 14 "syscall" 15 16 "github.com/Sirupsen/logrus" 17 "github.com/opencontainers/runc/libcontainer/cgroups" 18 "github.com/opencontainers/runc/libcontainer/configs" 19 "github.com/opencontainers/runc/libcontainer/system" 20 "github.com/opencontainers/runc/libcontainer/user" 21 "github.com/opencontainers/runc/libcontainer/utils" 22 "github.com/vishvananda/netlink" 23) 24 25type initType string 26 27const ( 28 initSetns initType = "setns" 29 initStandard initType = "standard" 30) 31 32type pid struct { 33 Pid int `json:"pid"` 34} 35 36// network is an internal struct used to setup container networks. 37type network struct { 38 configs.Network 39 40 // TempVethPeerName is a unique temporary veth peer name that was placed into 41 // the container's namespace. 42 TempVethPeerName string `json:"temp_veth_peer_name"` 43} 44 45// initConfig is used for transferring parameters from Exec() to Init() 46type initConfig struct { 47 Args []string `json:"args"` 48 Env []string `json:"env"` 49 Cwd string `json:"cwd"` 50 Capabilities []string `json:"capabilities"` 51 ProcessLabel string `json:"process_label"` 52 AppArmorProfile string `json:"apparmor_profile"` 53 NoNewPrivileges bool `json:"no_new_privileges"` 54 User string `json:"user"` 55 Config *configs.Config `json:"config"` 56 Console string `json:"console"` 57 Networks []*network `json:"network"` 58 PassedFilesCount int `json:"passed_files_count"` 59 ContainerId string `json:"containerid"` 60 Rlimits []configs.Rlimit `json:"rlimits"` 61} 62 63type initer interface { 64 Init() error 65} 66 67func newContainerInit(t initType, pipe *os.File) (initer, error) { 68 var config *initConfig 69 if err := json.NewDecoder(pipe).Decode(&config); err != nil { 70 return nil, err 71 } 72 if err := populateProcessEnvironment(config.Env); err != nil { 73 return nil, err 74 } 75 switch t { 76 case initSetns: 77 return &linuxSetnsInit{ 78 config: config, 79 }, nil 80 case initStandard: 81 return &linuxStandardInit{ 82 pipe: pipe, 83 parentPid: syscall.Getppid(), 84 config: config, 85 }, nil 86 } 87 return nil, fmt.Errorf("unknown init type %q", t) 88} 89 90// populateProcessEnvironment loads the provided environment variables into the 91// current processes's environment. 92func populateProcessEnvironment(env []string) error { 93 for _, pair := range env { 94 p := strings.SplitN(pair, "=", 2) 95 if len(p) < 2 { 96 return fmt.Errorf("invalid environment '%v'", pair) 97 } 98 if err := os.Setenv(p[0], p[1]); err != nil { 99 return err 100 } 101 } 102 return nil 103} 104 105// finalizeNamespace drops the caps, sets the correct user 106// and working dir, and closes any leaked file descriptors 107// before executing the command inside the namespace 108func finalizeNamespace(config *initConfig) error { 109 // Ensure that all unwanted fds we may have accidentally 110 // inherited are marked close-on-exec so they stay out of the 111 // container 112 if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil { 113 return err 114 } 115 116 capabilities := config.Config.Capabilities 117 if config.Capabilities != nil { 118 capabilities = config.Capabilities 119 } 120 w, err := newCapWhitelist(capabilities) 121 if err != nil { 122 return err 123 } 124 // drop capabilities in bounding set before changing user 125 if err := w.dropBoundingSet(); err != nil { 126 return err 127 } 128 // preserve existing capabilities while we change users 129 if err := system.SetKeepCaps(); err != nil { 130 return err 131 } 132 if err := setupUser(config); err != nil { 133 return err 134 } 135 if err := system.ClearKeepCaps(); err != nil { 136 return err 137 } 138 // drop all other capabilities 139 if err := w.drop(); err != nil { 140 return err 141 } 142 if config.Cwd != "" { 143 if err := syscall.Chdir(config.Cwd); err != nil { 144 return err 145 } 146 } 147 return nil 148} 149 150// syncParentReady sends to the given pipe a JSON payload which indicates that 151// the init is ready to Exec the child process. It then waits for the parent to 152// indicate that it is cleared to Exec. 153func syncParentReady(pipe io.ReadWriter) error { 154 // Tell parent. 155 if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil { 156 return err 157 } 158 // Wait for parent to give the all-clear. 159 var procSync syncT 160 if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { 161 if err == io.EOF { 162 return fmt.Errorf("parent closed synchronisation channel") 163 } 164 if procSync.Type != procRun { 165 return fmt.Errorf("invalid synchronisation flag from parent") 166 } 167 } 168 return nil 169} 170 171// syncParentHooks sends to the given pipe a JSON payload which indicates that 172// the parent should execute pre-start hooks. It then waits for the parent to 173// indicate that it is cleared to resume. 174func syncParentHooks(pipe io.ReadWriter) error { 175 // Tell parent. 176 if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil { 177 return err 178 } 179 // Wait for parent to give the all-clear. 180 var procSync syncT 181 if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { 182 if err == io.EOF { 183 return fmt.Errorf("parent closed synchronisation channel") 184 } 185 if procSync.Type != procResume { 186 return fmt.Errorf("invalid synchronisation flag from parent") 187 } 188 } 189 return nil 190} 191 192// setupUser changes the groups, gid, and uid for the user inside the container 193func setupUser(config *initConfig) error { 194 // Set up defaults. 195 defaultExecUser := user.ExecUser{ 196 Uid: syscall.Getuid(), 197 Gid: syscall.Getgid(), 198 Home: "/", 199 } 200 passwdPath, err := user.GetPasswdPath() 201 if err != nil { 202 return err 203 } 204 groupPath, err := user.GetGroupPath() 205 if err != nil { 206 return err 207 } 208 execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) 209 if err != nil { 210 return err 211 } 212 213 var addGroups []int 214 if len(config.Config.AdditionalGroups) > 0 { 215 addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath) 216 if err != nil { 217 return err 218 } 219 } 220 // before we change to the container's user make sure that the processes STDIO 221 // is correctly owned by the user that we are switching to. 222 if err := fixStdioPermissions(execUser); err != nil { 223 return err 224 } 225 suppGroups := append(execUser.Sgids, addGroups...) 226 if err := syscall.Setgroups(suppGroups); err != nil { 227 return err 228 } 229 230 if err := system.Setgid(execUser.Gid); err != nil { 231 return err 232 } 233 if err := system.Setuid(execUser.Uid); err != nil { 234 return err 235 } 236 // if we didn't get HOME already, set it based on the user's HOME 237 if envHome := os.Getenv("HOME"); envHome == "" { 238 if err := os.Setenv("HOME", execUser.Home); err != nil { 239 return err 240 } 241 } 242 return nil 243} 244 245// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. 246// The ownership needs to match because it is created outside of the container and needs to be 247// localized. 248func fixStdioPermissions(u *user.ExecUser) error { 249 var null syscall.Stat_t 250 if err := syscall.Stat("/dev/null", &null); err != nil { 251 return err 252 } 253 for _, fd := range []uintptr{ 254 os.Stdin.Fd(), 255 os.Stderr.Fd(), 256 os.Stdout.Fd(), 257 } { 258 var s syscall.Stat_t 259 if err := syscall.Fstat(int(fd), &s); err != nil { 260 return err 261 } 262 // skip chown of /dev/null if it was used as one of the STDIO fds. 263 if s.Rdev == null.Rdev { 264 continue 265 } 266 if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil { 267 return err 268 } 269 } 270 return nil 271} 272 273// setupNetwork sets up and initializes any network interface inside the container. 274func setupNetwork(config *initConfig) error { 275 for _, config := range config.Networks { 276 strategy, err := getStrategy(config.Type) 277 if err != nil { 278 return err 279 } 280 if err := strategy.initialize(config); err != nil { 281 return err 282 } 283 } 284 return nil 285} 286 287func setupRoute(config *configs.Config) error { 288 for _, config := range config.Routes { 289 _, dst, err := net.ParseCIDR(config.Destination) 290 if err != nil { 291 return err 292 } 293 src := net.ParseIP(config.Source) 294 if src == nil { 295 return fmt.Errorf("Invalid source for route: %s", config.Source) 296 } 297 gw := net.ParseIP(config.Gateway) 298 if gw == nil { 299 return fmt.Errorf("Invalid gateway for route: %s", config.Gateway) 300 } 301 l, err := netlink.LinkByName(config.InterfaceName) 302 if err != nil { 303 return err 304 } 305 route := &netlink.Route{ 306 Scope: netlink.SCOPE_UNIVERSE, 307 Dst: dst, 308 Src: src, 309 Gw: gw, 310 LinkIndex: l.Attrs().Index, 311 } 312 if err := netlink.RouteAdd(route); err != nil { 313 return err 314 } 315 } 316 return nil 317} 318 319func setupRlimits(limits []configs.Rlimit, pid int) error { 320 for _, rlimit := range limits { 321 if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil { 322 return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) 323 } 324 } 325 return nil 326} 327 328func setOomScoreAdj(oomScoreAdj int, pid int) error { 329 path := fmt.Sprintf("/proc/%d/oom_score_adj", pid) 330 331 return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600) 332} 333 334// killCgroupProcesses freezes then iterates over all the processes inside the 335// manager's cgroups sending a SIGKILL to each process then waiting for them to 336// exit. 337func killCgroupProcesses(m cgroups.Manager) error { 338 var procs []*os.Process 339 if err := m.Freeze(configs.Frozen); err != nil { 340 logrus.Warn(err) 341 } 342 pids, err := m.GetAllPids() 343 if err != nil { 344 m.Freeze(configs.Thawed) 345 return err 346 } 347 for _, pid := range pids { 348 p, err := os.FindProcess(pid) 349 if err != nil { 350 logrus.Warn(err) 351 continue 352 } 353 procs = append(procs, p) 354 if err := p.Kill(); err != nil { 355 logrus.Warn(err) 356 } 357 } 358 if err := m.Freeze(configs.Thawed); err != nil { 359 logrus.Warn(err) 360 } 361 for _, p := range procs { 362 if _, err := p.Wait(); err != nil { 363 logrus.Warn(err) 364 } 365 } 366 return nil 367} 368