1// +build !windows
2
3/*
4   Copyright The containerd Authors.
5
6   Licensed under the Apache License, Version 2.0 (the "License");
7   you may not use this file except in compliance with the License.
8   You may obtain a copy of the License at
9
10       http://www.apache.org/licenses/LICENSE-2.0
11
12   Unless required by applicable law or agreed to in writing, software
13   distributed under the License is distributed on an "AS IS" BASIS,
14   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   See the License for the specific language governing permissions and
16   limitations under the License.
17*/
18
19package run
20
21import (
22	gocontext "context"
23	"fmt"
24	"path/filepath"
25	"strconv"
26	"strings"
27
28	"github.com/containerd/containerd"
29	"github.com/containerd/containerd/cmd/ctr/commands"
30	"github.com/containerd/containerd/contrib/apparmor"
31	"github.com/containerd/containerd/contrib/nvidia"
32	"github.com/containerd/containerd/contrib/seccomp"
33	"github.com/containerd/containerd/oci"
34	runtimeoptions "github.com/containerd/containerd/pkg/runtimeoptions/v1"
35	"github.com/containerd/containerd/platforms"
36	"github.com/containerd/containerd/runtime/v2/runc/options"
37	"github.com/opencontainers/runtime-spec/specs-go"
38	"github.com/pkg/errors"
39	"github.com/sirupsen/logrus"
40	"github.com/urfave/cli"
41)
42
43var platformRunFlags = []cli.Flag{
44	cli.StringFlag{
45		Name:  "runc-binary",
46		Usage: "specify runc-compatible binary",
47	},
48	cli.StringFlag{
49		Name:  "runc-root",
50		Usage: "specify runc-compatible root",
51	},
52	cli.BoolFlag{
53		Name:  "runc-systemd-cgroup",
54		Usage: "start runc with systemd cgroup manager",
55	},
56	cli.StringFlag{
57		Name:  "uidmap",
58		Usage: "run inside a user namespace with the specified UID mapping range; specified with the format `container-uid:host-uid:length`",
59	},
60	cli.StringFlag{
61		Name:  "gidmap",
62		Usage: "run inside a user namespace with the specified GID mapping range; specified with the format `container-gid:host-gid:length`",
63	},
64	cli.BoolFlag{
65		Name:  "remap-labels",
66		Usage: "provide the user namespace ID remapping to the snapshotter via label options; requires snapshotter support",
67	},
68	cli.Float64Flag{
69		Name:  "cpus",
70		Usage: "set the CFS cpu quota",
71		Value: 0.0,
72	},
73	cli.BoolFlag{
74		Name:  "cni",
75		Usage: "enable cni networking for the container",
76	},
77}
78
79// NewContainer creates a new container
80func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli.Context) (containerd.Container, error) {
81	var (
82		id     string
83		config = context.IsSet("config")
84	)
85	if config {
86		id = context.Args().First()
87	} else {
88		id = context.Args().Get(1)
89	}
90
91	var (
92		opts  []oci.SpecOpts
93		cOpts []containerd.NewContainerOpts
94		spec  containerd.NewContainerOpts
95	)
96
97	cOpts = append(cOpts, containerd.WithContainerLabels(commands.LabelArgs(context.StringSlice("label"))))
98	if config {
99		opts = append(opts, oci.WithSpecFromFile(context.String("config")))
100	} else {
101		var (
102			ref = context.Args().First()
103			//for container's id is Args[1]
104			args = context.Args()[2:]
105		)
106		opts = append(opts, oci.WithDefaultSpec(), oci.WithDefaultUnixDevices)
107		if ef := context.String("env-file"); ef != "" {
108			opts = append(opts, oci.WithEnvFile(ef))
109		}
110		opts = append(opts, oci.WithEnv(context.StringSlice("env")))
111		opts = append(opts, withMounts(context))
112
113		if context.Bool("rootfs") {
114			rootfs, err := filepath.Abs(ref)
115			if err != nil {
116				return nil, err
117			}
118			opts = append(opts, oci.WithRootFSPath(rootfs))
119		} else {
120			snapshotter := context.String("snapshotter")
121			var image containerd.Image
122			i, err := client.ImageService().Get(ctx, ref)
123			if err != nil {
124				return nil, err
125			}
126			if ps := context.String("platform"); ps != "" {
127				platform, err := platforms.Parse(ps)
128				if err != nil {
129					return nil, err
130				}
131				image = containerd.NewImageWithPlatform(client, i, platforms.Only(platform))
132			} else {
133				image = containerd.NewImage(client, i)
134			}
135
136			unpacked, err := image.IsUnpacked(ctx, snapshotter)
137			if err != nil {
138				return nil, err
139			}
140			if !unpacked {
141				if err := image.Unpack(ctx, snapshotter); err != nil {
142					return nil, err
143				}
144			}
145			opts = append(opts, oci.WithImageConfig(image))
146			cOpts = append(cOpts,
147				containerd.WithImage(image),
148				containerd.WithSnapshotter(snapshotter))
149			if uidmap, gidmap := context.String("uidmap"), context.String("gidmap"); uidmap != "" && gidmap != "" {
150				uidMap, err := parseIDMapping(uidmap)
151				if err != nil {
152					return nil, err
153				}
154				gidMap, err := parseIDMapping(gidmap)
155				if err != nil {
156					return nil, err
157				}
158				opts = append(opts,
159					oci.WithUserNamespace([]specs.LinuxIDMapping{uidMap}, []specs.LinuxIDMapping{gidMap}))
160				// use snapshotter opts or the remapped snapshot support to shift the filesystem
161				// currently the only snapshotter known to support the labels is fuse-overlayfs:
162				// https://github.com/AkihiroSuda/containerd-fuse-overlayfs
163				if context.Bool("remap-labels") {
164					cOpts = append(cOpts, containerd.WithNewSnapshot(id, image,
165						containerd.WithRemapperLabels(0, uidMap.HostID, 0, gidMap.HostID, uidMap.Size)))
166				} else {
167					cOpts = append(cOpts, containerd.WithRemappedSnapshot(id, image, uidMap.HostID, gidMap.HostID))
168				}
169			} else {
170				// Even when "read-only" is set, we don't use KindView snapshot here. (#1495)
171				// We pass writable snapshot to the OCI runtime, and the runtime remounts it as read-only,
172				// after creating some mount points on demand.
173				cOpts = append(cOpts, containerd.WithNewSnapshot(id, image))
174			}
175			cOpts = append(cOpts, containerd.WithImageStopSignal(image, "SIGTERM"))
176		}
177		if context.Bool("read-only") {
178			opts = append(opts, oci.WithRootFSReadonly())
179		}
180		if len(args) > 0 {
181			opts = append(opts, oci.WithProcessArgs(args...))
182		}
183		if cwd := context.String("cwd"); cwd != "" {
184			opts = append(opts, oci.WithProcessCwd(cwd))
185		}
186		if context.Bool("tty") {
187			opts = append(opts, oci.WithTTY)
188		}
189		if context.Bool("privileged") {
190			opts = append(opts, oci.WithPrivileged, oci.WithAllDevicesAllowed, oci.WithHostDevices)
191		}
192		if context.Bool("net-host") {
193			opts = append(opts, oci.WithHostNamespace(specs.NetworkNamespace), oci.WithHostHostsFile, oci.WithHostResolvconf)
194		}
195
196		seccompProfile := context.String("seccomp-profile")
197
198		if !context.Bool("seccomp") && seccompProfile != "" {
199			return nil, fmt.Errorf("seccomp must be set to true, if using a custom seccomp-profile")
200		}
201
202		if context.Bool("seccomp") {
203			if seccompProfile != "" {
204				opts = append(opts, seccomp.WithProfile(seccompProfile))
205			} else {
206				opts = append(opts, seccomp.WithDefaultProfile())
207			}
208		}
209
210		if s := context.String("apparmor-default-profile"); len(s) > 0 {
211			opts = append(opts, apparmor.WithDefaultProfile(s))
212		}
213
214		if s := context.String("apparmor-profile"); len(s) > 0 {
215			if len(context.String("apparmor-default-profile")) > 0 {
216				return nil, fmt.Errorf("apparmor-profile conflicts with apparmor-default-profile")
217			}
218			opts = append(opts, apparmor.WithProfile(s))
219		}
220
221		if cpus := context.Float64("cpus"); cpus > 0.0 {
222			var (
223				period = uint64(100000)
224				quota  = int64(cpus * 100000.0)
225			)
226			opts = append(opts, oci.WithCPUCFS(quota, period))
227		}
228
229		quota := context.Int64("cpu-quota")
230		period := context.Uint64("cpu-period")
231		if quota != -1 || period != 0 {
232			if cpus := context.Float64("cpus"); cpus > 0.0 {
233				return nil, errors.New("cpus and quota/period should be used separately")
234			}
235			opts = append(opts, oci.WithCPUCFS(quota, period))
236		}
237
238		joinNs := context.StringSlice("with-ns")
239		for _, ns := range joinNs {
240			parts := strings.Split(ns, ":")
241			if len(parts) != 2 {
242				return nil, errors.New("joining a Linux namespace using --with-ns requires the format 'nstype:path'")
243			}
244			if !validNamespace(parts[0]) {
245				return nil, errors.New("the Linux namespace type specified in --with-ns is not valid: " + parts[0])
246			}
247			opts = append(opts, oci.WithLinuxNamespace(specs.LinuxNamespace{
248				Type: specs.LinuxNamespaceType(parts[0]),
249				Path: parts[1],
250			}))
251		}
252		if context.IsSet("gpus") {
253			opts = append(opts, nvidia.WithGPUs(nvidia.WithDevices(context.Int("gpus")), nvidia.WithAllCapabilities))
254		}
255		if context.IsSet("allow-new-privs") {
256			opts = append(opts, oci.WithNewPrivileges)
257		}
258		if context.IsSet("cgroup") {
259			// NOTE: can be set to "" explicitly for disabling cgroup.
260			opts = append(opts, oci.WithCgroup(context.String("cgroup")))
261		}
262		limit := context.Uint64("memory-limit")
263		if limit != 0 {
264			opts = append(opts, oci.WithMemoryLimit(limit))
265		}
266		for _, dev := range context.StringSlice("device") {
267			opts = append(opts, oci.WithLinuxDevice(dev, "rwm"))
268		}
269	}
270
271	runtimeOpts, err := getRuntimeOptions(context)
272	if err != nil {
273		return nil, err
274	}
275	cOpts = append(cOpts, containerd.WithRuntime(context.String("runtime"), runtimeOpts))
276
277	opts = append(opts, oci.WithAnnotations(commands.LabelArgs(context.StringSlice("label"))))
278	var s specs.Spec
279	spec = containerd.WithSpec(&s, opts...)
280
281	cOpts = append(cOpts, spec)
282
283	// oci.WithImageConfig (WithUsername, WithUserID) depends on access to rootfs for resolving via
284	// the /etc/{passwd,group} files. So cOpts needs to have precedence over opts.
285	return client.NewContainer(ctx, id, cOpts...)
286}
287
288func getRuncOptions(context *cli.Context) (*options.Options, error) {
289	runtimeOpts := &options.Options{}
290	if runcBinary := context.String("runc-binary"); runcBinary != "" {
291		runtimeOpts.BinaryName = runcBinary
292	}
293	if context.Bool("runc-systemd-cgroup") {
294		if context.String("cgroup") == "" {
295			// runc maps "machine.slice:foo:deadbeef" to "/machine.slice/foo-deadbeef.scope"
296			return nil, errors.New("option --runc-systemd-cgroup requires --cgroup to be set, e.g. \"machine.slice:foo:deadbeef\"")
297		}
298		runtimeOpts.SystemdCgroup = true
299	}
300	if root := context.String("runc-root"); root != "" {
301		runtimeOpts.Root = root
302	}
303
304	return runtimeOpts, nil
305}
306
307func getRuntimeOptions(context *cli.Context) (interface{}, error) {
308	// validate first
309	if (context.String("runc-binary") != "" || context.Bool("runc-systemd-cgroup")) &&
310		context.String("runtime") != "io.containerd.runc.v2" {
311		return nil, errors.New("specifying runc-binary and runc-systemd-cgroup is only supported for \"io.containerd.runc.v2\" runtime")
312	}
313
314	if context.String("runtime") == "io.containerd.runc.v2" {
315		return getRuncOptions(context)
316	}
317
318	if configPath := context.String("runtime-config-path"); configPath != "" {
319		return &runtimeoptions.Options{
320			ConfigPath: configPath,
321		}, nil
322	}
323
324	return nil, nil
325}
326
327func getNewTaskOpts(context *cli.Context) []containerd.NewTaskOpts {
328	var (
329		tOpts []containerd.NewTaskOpts
330	)
331	if context.Bool("no-pivot") {
332		tOpts = append(tOpts, containerd.WithNoPivotRoot)
333	}
334	if uidmap := context.String("uidmap"); uidmap != "" {
335		uidMap, err := parseIDMapping(uidmap)
336		if err != nil {
337			logrus.WithError(err).Warn("unable to parse uidmap; defaulting to uid 0 IO ownership")
338		}
339		tOpts = append(tOpts, containerd.WithUIDOwner(uidMap.HostID))
340	}
341	if gidmap := context.String("gidmap"); gidmap != "" {
342		gidMap, err := parseIDMapping(gidmap)
343		if err != nil {
344			logrus.WithError(err).Warn("unable to parse gidmap; defaulting to gid 0 IO ownership")
345		}
346		tOpts = append(tOpts, containerd.WithGIDOwner(gidMap.HostID))
347	}
348	return tOpts
349}
350
351func parseIDMapping(mapping string) (specs.LinuxIDMapping, error) {
352	parts := strings.Split(mapping, ":")
353	if len(parts) != 3 {
354		return specs.LinuxIDMapping{}, errors.New("user namespace mappings require the format `container-id:host-id:size`")
355	}
356	cID, err := strconv.ParseUint(parts[0], 0, 32)
357	if err != nil {
358		return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid container id for user namespace remapping")
359	}
360	hID, err := strconv.ParseUint(parts[1], 0, 32)
361	if err != nil {
362		return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid host id for user namespace remapping")
363	}
364	size, err := strconv.ParseUint(parts[2], 0, 32)
365	if err != nil {
366		return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid size for user namespace remapping")
367	}
368	return specs.LinuxIDMapping{
369		ContainerID: uint32(cID),
370		HostID:      uint32(hID),
371		Size:        uint32(size),
372	}, nil
373}
374
375func validNamespace(ns string) bool {
376	linuxNs := specs.LinuxNamespaceType(ns)
377	switch linuxNs {
378	case specs.PIDNamespace,
379		specs.NetworkNamespace,
380		specs.UTSNamespace,
381		specs.MountNamespace,
382		specs.UserNamespace,
383		specs.IPCNamespace,
384		specs.CgroupNamespace:
385		return true
386	default:
387		return false
388	}
389}
390