1// +build !windows 2 3/* 4 Copyright The containerd Authors. 5 6 Licensed under the Apache License, Version 2.0 (the "License"); 7 you may not use this file except in compliance with the License. 8 You may obtain a copy of the License at 9 10 http://www.apache.org/licenses/LICENSE-2.0 11 12 Unless required by applicable law or agreed to in writing, software 13 distributed under the License is distributed on an "AS IS" BASIS, 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 See the License for the specific language governing permissions and 16 limitations under the License. 17*/ 18 19package run 20 21import ( 22 gocontext "context" 23 "fmt" 24 "path/filepath" 25 "strconv" 26 "strings" 27 28 "github.com/containerd/containerd" 29 "github.com/containerd/containerd/cmd/ctr/commands" 30 "github.com/containerd/containerd/contrib/apparmor" 31 "github.com/containerd/containerd/contrib/nvidia" 32 "github.com/containerd/containerd/contrib/seccomp" 33 "github.com/containerd/containerd/oci" 34 runtimeoptions "github.com/containerd/containerd/pkg/runtimeoptions/v1" 35 "github.com/containerd/containerd/platforms" 36 "github.com/containerd/containerd/runtime/v2/runc/options" 37 "github.com/opencontainers/runtime-spec/specs-go" 38 "github.com/pkg/errors" 39 "github.com/sirupsen/logrus" 40 "github.com/urfave/cli" 41) 42 43var platformRunFlags = []cli.Flag{ 44 cli.StringFlag{ 45 Name: "runc-binary", 46 Usage: "specify runc-compatible binary", 47 }, 48 cli.StringFlag{ 49 Name: "runc-root", 50 Usage: "specify runc-compatible root", 51 }, 52 cli.BoolFlag{ 53 Name: "runc-systemd-cgroup", 54 Usage: "start runc with systemd cgroup manager", 55 }, 56 cli.StringFlag{ 57 Name: "uidmap", 58 Usage: "run inside a user namespace with the specified UID mapping range; specified with the format `container-uid:host-uid:length`", 59 }, 60 cli.StringFlag{ 61 Name: "gidmap", 62 Usage: "run inside a user namespace with the specified GID mapping range; specified with the format `container-gid:host-gid:length`", 63 }, 64 cli.BoolFlag{ 65 Name: "remap-labels", 66 Usage: "provide the user namespace ID remapping to the snapshotter via label options; requires snapshotter support", 67 }, 68 cli.Float64Flag{ 69 Name: "cpus", 70 Usage: "set the CFS cpu quota", 71 Value: 0.0, 72 }, 73 cli.BoolFlag{ 74 Name: "cni", 75 Usage: "enable cni networking for the container", 76 }, 77} 78 79// NewContainer creates a new container 80func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli.Context) (containerd.Container, error) { 81 var ( 82 id string 83 config = context.IsSet("config") 84 ) 85 if config { 86 id = context.Args().First() 87 } else { 88 id = context.Args().Get(1) 89 } 90 91 var ( 92 opts []oci.SpecOpts 93 cOpts []containerd.NewContainerOpts 94 spec containerd.NewContainerOpts 95 ) 96 97 cOpts = append(cOpts, containerd.WithContainerLabels(commands.LabelArgs(context.StringSlice("label")))) 98 if config { 99 opts = append(opts, oci.WithSpecFromFile(context.String("config"))) 100 } else { 101 var ( 102 ref = context.Args().First() 103 //for container's id is Args[1] 104 args = context.Args()[2:] 105 ) 106 opts = append(opts, oci.WithDefaultSpec(), oci.WithDefaultUnixDevices) 107 if ef := context.String("env-file"); ef != "" { 108 opts = append(opts, oci.WithEnvFile(ef)) 109 } 110 opts = append(opts, oci.WithEnv(context.StringSlice("env"))) 111 opts = append(opts, withMounts(context)) 112 113 if context.Bool("rootfs") { 114 rootfs, err := filepath.Abs(ref) 115 if err != nil { 116 return nil, err 117 } 118 opts = append(opts, oci.WithRootFSPath(rootfs)) 119 } else { 120 snapshotter := context.String("snapshotter") 121 var image containerd.Image 122 i, err := client.ImageService().Get(ctx, ref) 123 if err != nil { 124 return nil, err 125 } 126 if ps := context.String("platform"); ps != "" { 127 platform, err := platforms.Parse(ps) 128 if err != nil { 129 return nil, err 130 } 131 image = containerd.NewImageWithPlatform(client, i, platforms.Only(platform)) 132 } else { 133 image = containerd.NewImage(client, i) 134 } 135 136 unpacked, err := image.IsUnpacked(ctx, snapshotter) 137 if err != nil { 138 return nil, err 139 } 140 if !unpacked { 141 if err := image.Unpack(ctx, snapshotter); err != nil { 142 return nil, err 143 } 144 } 145 opts = append(opts, oci.WithImageConfig(image)) 146 cOpts = append(cOpts, 147 containerd.WithImage(image), 148 containerd.WithSnapshotter(snapshotter)) 149 if uidmap, gidmap := context.String("uidmap"), context.String("gidmap"); uidmap != "" && gidmap != "" { 150 uidMap, err := parseIDMapping(uidmap) 151 if err != nil { 152 return nil, err 153 } 154 gidMap, err := parseIDMapping(gidmap) 155 if err != nil { 156 return nil, err 157 } 158 opts = append(opts, 159 oci.WithUserNamespace([]specs.LinuxIDMapping{uidMap}, []specs.LinuxIDMapping{gidMap})) 160 // use snapshotter opts or the remapped snapshot support to shift the filesystem 161 // currently the only snapshotter known to support the labels is fuse-overlayfs: 162 // https://github.com/AkihiroSuda/containerd-fuse-overlayfs 163 if context.Bool("remap-labels") { 164 cOpts = append(cOpts, containerd.WithNewSnapshot(id, image, 165 containerd.WithRemapperLabels(0, uidMap.HostID, 0, gidMap.HostID, uidMap.Size))) 166 } else { 167 cOpts = append(cOpts, containerd.WithRemappedSnapshot(id, image, uidMap.HostID, gidMap.HostID)) 168 } 169 } else { 170 // Even when "read-only" is set, we don't use KindView snapshot here. (#1495) 171 // We pass writable snapshot to the OCI runtime, and the runtime remounts it as read-only, 172 // after creating some mount points on demand. 173 cOpts = append(cOpts, containerd.WithNewSnapshot(id, image)) 174 } 175 cOpts = append(cOpts, containerd.WithImageStopSignal(image, "SIGTERM")) 176 } 177 if context.Bool("read-only") { 178 opts = append(opts, oci.WithRootFSReadonly()) 179 } 180 if len(args) > 0 { 181 opts = append(opts, oci.WithProcessArgs(args...)) 182 } 183 if cwd := context.String("cwd"); cwd != "" { 184 opts = append(opts, oci.WithProcessCwd(cwd)) 185 } 186 if context.Bool("tty") { 187 opts = append(opts, oci.WithTTY) 188 } 189 if context.Bool("privileged") { 190 opts = append(opts, oci.WithPrivileged, oci.WithAllDevicesAllowed, oci.WithHostDevices) 191 } 192 if context.Bool("net-host") { 193 opts = append(opts, oci.WithHostNamespace(specs.NetworkNamespace), oci.WithHostHostsFile, oci.WithHostResolvconf) 194 } 195 196 seccompProfile := context.String("seccomp-profile") 197 198 if !context.Bool("seccomp") && seccompProfile != "" { 199 return nil, fmt.Errorf("seccomp must be set to true, if using a custom seccomp-profile") 200 } 201 202 if context.Bool("seccomp") { 203 if seccompProfile != "" { 204 opts = append(opts, seccomp.WithProfile(seccompProfile)) 205 } else { 206 opts = append(opts, seccomp.WithDefaultProfile()) 207 } 208 } 209 210 if s := context.String("apparmor-default-profile"); len(s) > 0 { 211 opts = append(opts, apparmor.WithDefaultProfile(s)) 212 } 213 214 if s := context.String("apparmor-profile"); len(s) > 0 { 215 if len(context.String("apparmor-default-profile")) > 0 { 216 return nil, fmt.Errorf("apparmor-profile conflicts with apparmor-default-profile") 217 } 218 opts = append(opts, apparmor.WithProfile(s)) 219 } 220 221 if cpus := context.Float64("cpus"); cpus > 0.0 { 222 var ( 223 period = uint64(100000) 224 quota = int64(cpus * 100000.0) 225 ) 226 opts = append(opts, oci.WithCPUCFS(quota, period)) 227 } 228 229 quota := context.Int64("cpu-quota") 230 period := context.Uint64("cpu-period") 231 if quota != -1 || period != 0 { 232 if cpus := context.Float64("cpus"); cpus > 0.0 { 233 return nil, errors.New("cpus and quota/period should be used separately") 234 } 235 opts = append(opts, oci.WithCPUCFS(quota, period)) 236 } 237 238 joinNs := context.StringSlice("with-ns") 239 for _, ns := range joinNs { 240 parts := strings.Split(ns, ":") 241 if len(parts) != 2 { 242 return nil, errors.New("joining a Linux namespace using --with-ns requires the format 'nstype:path'") 243 } 244 if !validNamespace(parts[0]) { 245 return nil, errors.New("the Linux namespace type specified in --with-ns is not valid: " + parts[0]) 246 } 247 opts = append(opts, oci.WithLinuxNamespace(specs.LinuxNamespace{ 248 Type: specs.LinuxNamespaceType(parts[0]), 249 Path: parts[1], 250 })) 251 } 252 if context.IsSet("gpus") { 253 opts = append(opts, nvidia.WithGPUs(nvidia.WithDevices(context.Int("gpus")), nvidia.WithAllCapabilities)) 254 } 255 if context.IsSet("allow-new-privs") { 256 opts = append(opts, oci.WithNewPrivileges) 257 } 258 if context.IsSet("cgroup") { 259 // NOTE: can be set to "" explicitly for disabling cgroup. 260 opts = append(opts, oci.WithCgroup(context.String("cgroup"))) 261 } 262 limit := context.Uint64("memory-limit") 263 if limit != 0 { 264 opts = append(opts, oci.WithMemoryLimit(limit)) 265 } 266 for _, dev := range context.StringSlice("device") { 267 opts = append(opts, oci.WithLinuxDevice(dev, "rwm")) 268 } 269 } 270 271 runtimeOpts, err := getRuntimeOptions(context) 272 if err != nil { 273 return nil, err 274 } 275 cOpts = append(cOpts, containerd.WithRuntime(context.String("runtime"), runtimeOpts)) 276 277 opts = append(opts, oci.WithAnnotations(commands.LabelArgs(context.StringSlice("label")))) 278 var s specs.Spec 279 spec = containerd.WithSpec(&s, opts...) 280 281 cOpts = append(cOpts, spec) 282 283 // oci.WithImageConfig (WithUsername, WithUserID) depends on access to rootfs for resolving via 284 // the /etc/{passwd,group} files. So cOpts needs to have precedence over opts. 285 return client.NewContainer(ctx, id, cOpts...) 286} 287 288func getRuncOptions(context *cli.Context) (*options.Options, error) { 289 runtimeOpts := &options.Options{} 290 if runcBinary := context.String("runc-binary"); runcBinary != "" { 291 runtimeOpts.BinaryName = runcBinary 292 } 293 if context.Bool("runc-systemd-cgroup") { 294 if context.String("cgroup") == "" { 295 // runc maps "machine.slice:foo:deadbeef" to "/machine.slice/foo-deadbeef.scope" 296 return nil, errors.New("option --runc-systemd-cgroup requires --cgroup to be set, e.g. \"machine.slice:foo:deadbeef\"") 297 } 298 runtimeOpts.SystemdCgroup = true 299 } 300 if root := context.String("runc-root"); root != "" { 301 runtimeOpts.Root = root 302 } 303 304 return runtimeOpts, nil 305} 306 307func getRuntimeOptions(context *cli.Context) (interface{}, error) { 308 // validate first 309 if (context.String("runc-binary") != "" || context.Bool("runc-systemd-cgroup")) && 310 context.String("runtime") != "io.containerd.runc.v2" { 311 return nil, errors.New("specifying runc-binary and runc-systemd-cgroup is only supported for \"io.containerd.runc.v2\" runtime") 312 } 313 314 if context.String("runtime") == "io.containerd.runc.v2" { 315 return getRuncOptions(context) 316 } 317 318 if configPath := context.String("runtime-config-path"); configPath != "" { 319 return &runtimeoptions.Options{ 320 ConfigPath: configPath, 321 }, nil 322 } 323 324 return nil, nil 325} 326 327func getNewTaskOpts(context *cli.Context) []containerd.NewTaskOpts { 328 var ( 329 tOpts []containerd.NewTaskOpts 330 ) 331 if context.Bool("no-pivot") { 332 tOpts = append(tOpts, containerd.WithNoPivotRoot) 333 } 334 if uidmap := context.String("uidmap"); uidmap != "" { 335 uidMap, err := parseIDMapping(uidmap) 336 if err != nil { 337 logrus.WithError(err).Warn("unable to parse uidmap; defaulting to uid 0 IO ownership") 338 } 339 tOpts = append(tOpts, containerd.WithUIDOwner(uidMap.HostID)) 340 } 341 if gidmap := context.String("gidmap"); gidmap != "" { 342 gidMap, err := parseIDMapping(gidmap) 343 if err != nil { 344 logrus.WithError(err).Warn("unable to parse gidmap; defaulting to gid 0 IO ownership") 345 } 346 tOpts = append(tOpts, containerd.WithGIDOwner(gidMap.HostID)) 347 } 348 return tOpts 349} 350 351func parseIDMapping(mapping string) (specs.LinuxIDMapping, error) { 352 parts := strings.Split(mapping, ":") 353 if len(parts) != 3 { 354 return specs.LinuxIDMapping{}, errors.New("user namespace mappings require the format `container-id:host-id:size`") 355 } 356 cID, err := strconv.ParseUint(parts[0], 0, 32) 357 if err != nil { 358 return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid container id for user namespace remapping") 359 } 360 hID, err := strconv.ParseUint(parts[1], 0, 32) 361 if err != nil { 362 return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid host id for user namespace remapping") 363 } 364 size, err := strconv.ParseUint(parts[2], 0, 32) 365 if err != nil { 366 return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid size for user namespace remapping") 367 } 368 return specs.LinuxIDMapping{ 369 ContainerID: uint32(cID), 370 HostID: uint32(hID), 371 Size: uint32(size), 372 }, nil 373} 374 375func validNamespace(ns string) bool { 376 linuxNs := specs.LinuxNamespaceType(ns) 377 switch linuxNs { 378 case specs.PIDNamespace, 379 specs.NetworkNamespace, 380 specs.UTSNamespace, 381 specs.MountNamespace, 382 specs.UserNamespace, 383 specs.IPCNamespace, 384 specs.CgroupNamespace: 385 return true 386 default: 387 return false 388 } 389} 390