1/*
2   Copyright The containerd Authors.
3
4   Licensed under the Apache License, Version 2.0 (the "License");
5   you may not use this file except in compliance with the License.
6   You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10   Unless required by applicable law or agreed to in writing, software
11   distributed under the License is distributed on an "AS IS" BASIS,
12   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   See the License for the specific language governing permissions and
14   limitations under the License.
15*/
16
17package oci
18
19import (
20	"bufio"
21	"context"
22	"encoding/json"
23	"fmt"
24	"io/ioutil"
25	"os"
26	"path/filepath"
27	"strconv"
28	"strings"
29
30	"github.com/containerd/containerd/containers"
31	"github.com/containerd/containerd/content"
32	"github.com/containerd/containerd/images"
33	"github.com/containerd/containerd/mount"
34	"github.com/containerd/containerd/namespaces"
35	"github.com/containerd/containerd/platforms"
36	"github.com/containerd/continuity/fs"
37	v1 "github.com/opencontainers/image-spec/specs-go/v1"
38	"github.com/opencontainers/runc/libcontainer/user"
39	specs "github.com/opencontainers/runtime-spec/specs-go"
40	"github.com/pkg/errors"
41	"github.com/syndtr/gocapability/capability"
42)
43
44// SpecOpts sets spec specific information to a newly generated OCI spec
45type SpecOpts func(context.Context, Client, *containers.Container, *Spec) error
46
47// Compose converts a sequence of spec operations into a single operation
48func Compose(opts ...SpecOpts) SpecOpts {
49	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
50		for _, o := range opts {
51			if err := o(ctx, client, c, s); err != nil {
52				return err
53			}
54		}
55		return nil
56	}
57}
58
59// setProcess sets Process to empty if unset
60func setProcess(s *Spec) {
61	if s.Process == nil {
62		s.Process = &specs.Process{}
63	}
64}
65
66// setRoot sets Root to empty if unset
67func setRoot(s *Spec) {
68	if s.Root == nil {
69		s.Root = &specs.Root{}
70	}
71}
72
73// setLinux sets Linux to empty if unset
74func setLinux(s *Spec) {
75	if s.Linux == nil {
76		s.Linux = &specs.Linux{}
77	}
78}
79
80// nolint
81func setResources(s *Spec) {
82	if s.Linux != nil {
83		if s.Linux.Resources == nil {
84			s.Linux.Resources = &specs.LinuxResources{}
85		}
86	}
87	if s.Windows != nil {
88		if s.Windows.Resources == nil {
89			s.Windows.Resources = &specs.WindowsResources{}
90		}
91	}
92}
93
94// nolint
95func setCPU(s *Spec) {
96	setResources(s)
97	if s.Linux != nil {
98		if s.Linux.Resources.CPU == nil {
99			s.Linux.Resources.CPU = &specs.LinuxCPU{}
100		}
101	}
102	if s.Windows != nil {
103		if s.Windows.Resources.CPU == nil {
104			s.Windows.Resources.CPU = &specs.WindowsCPUResources{}
105		}
106	}
107}
108
109// setCapabilities sets Linux Capabilities to empty if unset
110func setCapabilities(s *Spec) {
111	setProcess(s)
112	if s.Process.Capabilities == nil {
113		s.Process.Capabilities = &specs.LinuxCapabilities{}
114	}
115}
116
117// WithDefaultSpec returns a SpecOpts that will populate the spec with default
118// values.
119//
120// Use as the first option to clear the spec, then apply options afterwards.
121func WithDefaultSpec() SpecOpts {
122	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
123		return generateDefaultSpecWithPlatform(ctx, platforms.DefaultString(), c.ID, s)
124	}
125}
126
127// WithDefaultSpecForPlatform returns a SpecOpts that will populate the spec
128// with default values for a given platform.
129//
130// Use as the first option to clear the spec, then apply options afterwards.
131func WithDefaultSpecForPlatform(platform string) SpecOpts {
132	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
133		return generateDefaultSpecWithPlatform(ctx, platform, c.ID, s)
134	}
135}
136
137// WithSpecFromBytes loads the spec from the provided byte slice.
138func WithSpecFromBytes(p []byte) SpecOpts {
139	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
140		*s = Spec{} // make sure spec is cleared.
141		if err := json.Unmarshal(p, s); err != nil {
142			return errors.Wrapf(err, "decoding spec config file failed, current supported OCI runtime-spec : v%s", specs.Version)
143		}
144		return nil
145	}
146}
147
148// WithSpecFromFile loads the specification from the provided filename.
149func WithSpecFromFile(filename string) SpecOpts {
150	return func(ctx context.Context, c Client, container *containers.Container, s *Spec) error {
151		p, err := ioutil.ReadFile(filename)
152		if err != nil {
153			return errors.Wrap(err, "cannot load spec config file")
154		}
155		return WithSpecFromBytes(p)(ctx, c, container, s)
156	}
157}
158
159// WithEnv appends environment variables
160func WithEnv(environmentVariables []string) SpecOpts {
161	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
162		if len(environmentVariables) > 0 {
163			setProcess(s)
164			s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, environmentVariables)
165		}
166		return nil
167	}
168}
169
170// WithDefaultPathEnv sets the $PATH environment variable to the
171// default PATH defined in this package.
172func WithDefaultPathEnv(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
173	s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, defaultUnixEnv)
174	return nil
175}
176
177// replaceOrAppendEnvValues returns the defaults with the overrides either
178// replaced by env key or appended to the list
179func replaceOrAppendEnvValues(defaults, overrides []string) []string {
180	cache := make(map[string]int, len(defaults))
181	results := make([]string, 0, len(defaults))
182	for i, e := range defaults {
183		parts := strings.SplitN(e, "=", 2)
184		results = append(results, e)
185		cache[parts[0]] = i
186	}
187
188	for _, value := range overrides {
189		// Values w/o = means they want this env to be removed/unset.
190		if !strings.Contains(value, "=") {
191			if i, exists := cache[value]; exists {
192				results[i] = "" // Used to indicate it should be removed
193			}
194			continue
195		}
196
197		// Just do a normal set/update
198		parts := strings.SplitN(value, "=", 2)
199		if i, exists := cache[parts[0]]; exists {
200			results[i] = value
201		} else {
202			results = append(results, value)
203		}
204	}
205
206	// Now remove all entries that we want to "unset"
207	for i := 0; i < len(results); i++ {
208		if results[i] == "" {
209			results = append(results[:i], results[i+1:]...)
210			i--
211		}
212	}
213
214	return results
215}
216
217// WithProcessArgs replaces the args on the generated spec
218func WithProcessArgs(args ...string) SpecOpts {
219	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
220		setProcess(s)
221		s.Process.Args = args
222		return nil
223	}
224}
225
226// WithProcessCwd replaces the current working directory on the generated spec
227func WithProcessCwd(cwd string) SpecOpts {
228	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
229		setProcess(s)
230		s.Process.Cwd = cwd
231		return nil
232	}
233}
234
235// WithTTY sets the information on the spec as well as the environment variables for
236// using a TTY
237func WithTTY(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
238	setProcess(s)
239	s.Process.Terminal = true
240	if s.Linux != nil {
241		s.Process.Env = append(s.Process.Env, "TERM=xterm")
242	}
243
244	return nil
245}
246
247// WithTTYSize sets the information on the spec as well as the environment variables for
248// using a TTY
249func WithTTYSize(width, height int) SpecOpts {
250	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
251		setProcess(s)
252		if s.Process.ConsoleSize == nil {
253			s.Process.ConsoleSize = &specs.Box{}
254		}
255		s.Process.ConsoleSize.Width = uint(width)
256		s.Process.ConsoleSize.Height = uint(height)
257		return nil
258	}
259}
260
261// WithHostname sets the container's hostname
262func WithHostname(name string) SpecOpts {
263	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
264		s.Hostname = name
265		return nil
266	}
267}
268
269// WithMounts appends mounts
270func WithMounts(mounts []specs.Mount) SpecOpts {
271	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
272		s.Mounts = append(s.Mounts, mounts...)
273		return nil
274	}
275}
276
277// WithHostNamespace allows a task to run inside the host's linux namespace
278func WithHostNamespace(ns specs.LinuxNamespaceType) SpecOpts {
279	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
280		setLinux(s)
281		for i, n := range s.Linux.Namespaces {
282			if n.Type == ns {
283				s.Linux.Namespaces = append(s.Linux.Namespaces[:i], s.Linux.Namespaces[i+1:]...)
284				return nil
285			}
286		}
287		return nil
288	}
289}
290
291// WithLinuxNamespace uses the passed in namespace for the spec. If a namespace of the same type already exists in the
292// spec, the existing namespace is replaced by the one provided.
293func WithLinuxNamespace(ns specs.LinuxNamespace) SpecOpts {
294	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
295		setLinux(s)
296		for i, n := range s.Linux.Namespaces {
297			if n.Type == ns.Type {
298				before := s.Linux.Namespaces[:i]
299				after := s.Linux.Namespaces[i+1:]
300				s.Linux.Namespaces = append(before, ns)
301				s.Linux.Namespaces = append(s.Linux.Namespaces, after...)
302				return nil
303			}
304		}
305		s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
306		return nil
307	}
308}
309
310// WithNewPrivileges turns off the NoNewPrivileges feature flag in the spec
311func WithNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
312	setProcess(s)
313	s.Process.NoNewPrivileges = false
314
315	return nil
316}
317
318// WithImageConfig configures the spec to from the configuration of an Image
319func WithImageConfig(image Image) SpecOpts {
320	return WithImageConfigArgs(image, nil)
321}
322
323// WithImageConfigArgs configures the spec to from the configuration of an Image with additional args that
324// replaces the CMD of the image
325func WithImageConfigArgs(image Image, args []string) SpecOpts {
326	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
327		ic, err := image.Config(ctx)
328		if err != nil {
329			return err
330		}
331		var (
332			ociimage v1.Image
333			config   v1.ImageConfig
334		)
335		switch ic.MediaType {
336		case v1.MediaTypeImageConfig, images.MediaTypeDockerSchema2Config:
337			p, err := content.ReadBlob(ctx, image.ContentStore(), ic)
338			if err != nil {
339				return err
340			}
341
342			if err := json.Unmarshal(p, &ociimage); err != nil {
343				return err
344			}
345			config = ociimage.Config
346		default:
347			return fmt.Errorf("unknown image config media type %s", ic.MediaType)
348		}
349
350		setProcess(s)
351		if s.Linux != nil {
352			defaults := config.Env
353			if len(defaults) == 0 {
354				defaults = defaultUnixEnv
355			}
356			s.Process.Env = replaceOrAppendEnvValues(defaults, s.Process.Env)
357			cmd := config.Cmd
358			if len(args) > 0 {
359				cmd = args
360			}
361			s.Process.Args = append(config.Entrypoint, cmd...)
362
363			cwd := config.WorkingDir
364			if cwd == "" {
365				cwd = "/"
366			}
367			s.Process.Cwd = cwd
368			if config.User != "" {
369				if err := WithUser(config.User)(ctx, client, c, s); err != nil {
370					return err
371				}
372				return WithAdditionalGIDs(fmt.Sprintf("%d", s.Process.User.UID))(ctx, client, c, s)
373			}
374			// we should query the image's /etc/group for additional GIDs
375			// even if there is no specified user in the image config
376			return WithAdditionalGIDs("root")(ctx, client, c, s)
377		} else if s.Windows != nil {
378			s.Process.Env = replaceOrAppendEnvValues(config.Env, s.Process.Env)
379			cmd := config.Cmd
380			if len(args) > 0 {
381				cmd = args
382			}
383			s.Process.Args = append(config.Entrypoint, cmd...)
384
385			s.Process.Cwd = config.WorkingDir
386			s.Process.User = specs.User{
387				Username: config.User,
388			}
389		} else {
390			return errors.New("spec does not contain Linux or Windows section")
391		}
392		return nil
393	}
394}
395
396// WithRootFSPath specifies unmanaged rootfs path.
397func WithRootFSPath(path string) SpecOpts {
398	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
399		setRoot(s)
400		s.Root.Path = path
401		// Entrypoint is not set here (it's up to caller)
402		return nil
403	}
404}
405
406// WithRootFSReadonly sets specs.Root.Readonly to true
407func WithRootFSReadonly() SpecOpts {
408	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
409		setRoot(s)
410		s.Root.Readonly = true
411		return nil
412	}
413}
414
415// WithNoNewPrivileges sets no_new_privileges on the process for the container
416func WithNoNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
417	setProcess(s)
418	s.Process.NoNewPrivileges = true
419	return nil
420}
421
422// WithHostHostsFile bind-mounts the host's /etc/hosts into the container as readonly
423func WithHostHostsFile(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
424	s.Mounts = append(s.Mounts, specs.Mount{
425		Destination: "/etc/hosts",
426		Type:        "bind",
427		Source:      "/etc/hosts",
428		Options:     []string{"rbind", "ro"},
429	})
430	return nil
431}
432
433// WithHostResolvconf bind-mounts the host's /etc/resolv.conf into the container as readonly
434func WithHostResolvconf(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
435	s.Mounts = append(s.Mounts, specs.Mount{
436		Destination: "/etc/resolv.conf",
437		Type:        "bind",
438		Source:      "/etc/resolv.conf",
439		Options:     []string{"rbind", "ro"},
440	})
441	return nil
442}
443
444// WithHostLocaltime bind-mounts the host's /etc/localtime into the container as readonly
445func WithHostLocaltime(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
446	s.Mounts = append(s.Mounts, specs.Mount{
447		Destination: "/etc/localtime",
448		Type:        "bind",
449		Source:      "/etc/localtime",
450		Options:     []string{"rbind", "ro"},
451	})
452	return nil
453}
454
455// WithUserNamespace sets the uid and gid mappings for the task
456// this can be called multiple times to add more mappings to the generated spec
457func WithUserNamespace(uidMap, gidMap []specs.LinuxIDMapping) SpecOpts {
458	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
459		var hasUserns bool
460		setLinux(s)
461		for _, ns := range s.Linux.Namespaces {
462			if ns.Type == specs.UserNamespace {
463				hasUserns = true
464				break
465			}
466		}
467		if !hasUserns {
468			s.Linux.Namespaces = append(s.Linux.Namespaces, specs.LinuxNamespace{
469				Type: specs.UserNamespace,
470			})
471		}
472		s.Linux.UIDMappings = append(s.Linux.UIDMappings, uidMap...)
473		s.Linux.GIDMappings = append(s.Linux.GIDMappings, gidMap...)
474		return nil
475	}
476}
477
478// WithCgroup sets the container's cgroup path
479func WithCgroup(path string) SpecOpts {
480	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
481		setLinux(s)
482		s.Linux.CgroupsPath = path
483		return nil
484	}
485}
486
487// WithNamespacedCgroup uses the namespace set on the context to create a
488// root directory for containers in the cgroup with the id as the subcgroup
489func WithNamespacedCgroup() SpecOpts {
490	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
491		namespace, err := namespaces.NamespaceRequired(ctx)
492		if err != nil {
493			return err
494		}
495		setLinux(s)
496		s.Linux.CgroupsPath = filepath.Join("/", namespace, c.ID)
497		return nil
498	}
499}
500
501// WithUser sets the user to be used within the container.
502// It accepts a valid user string in OCI Image Spec v1.0.0:
503//   user, uid, user:group, uid:gid, uid:group, user:gid
504func WithUser(userstr string) SpecOpts {
505	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
506		setProcess(s)
507		parts := strings.Split(userstr, ":")
508		switch len(parts) {
509		case 1:
510			v, err := strconv.Atoi(parts[0])
511			if err != nil {
512				// if we cannot parse as a uint they try to see if it is a username
513				return WithUsername(userstr)(ctx, client, c, s)
514			}
515			return WithUserID(uint32(v))(ctx, client, c, s)
516		case 2:
517			var (
518				username  string
519				groupname string
520			)
521			var uid, gid uint32
522			v, err := strconv.Atoi(parts[0])
523			if err != nil {
524				username = parts[0]
525			} else {
526				uid = uint32(v)
527			}
528			if v, err = strconv.Atoi(parts[1]); err != nil {
529				groupname = parts[1]
530			} else {
531				gid = uint32(v)
532			}
533			if username == "" && groupname == "" {
534				s.Process.User.UID, s.Process.User.GID = uid, gid
535				return nil
536			}
537			f := func(root string) error {
538				if username != "" {
539					user, err := getUserFromPath(root, func(u user.User) bool {
540						return u.Name == username
541					})
542					if err != nil {
543						return err
544					}
545					uid = uint32(user.Uid)
546				}
547				if groupname != "" {
548					gid, err = getGIDFromPath(root, func(g user.Group) bool {
549						return g.Name == groupname
550					})
551					if err != nil {
552						return err
553					}
554				}
555				s.Process.User.UID, s.Process.User.GID = uid, gid
556				return nil
557			}
558			if c.Snapshotter == "" && c.SnapshotKey == "" {
559				if !isRootfsAbs(s.Root.Path) {
560					return errors.New("rootfs absolute path is required")
561				}
562				return f(s.Root.Path)
563			}
564			if c.Snapshotter == "" {
565				return errors.New("no snapshotter set for container")
566			}
567			if c.SnapshotKey == "" {
568				return errors.New("rootfs snapshot not created for container")
569			}
570			snapshotter := client.SnapshotService(c.Snapshotter)
571			mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
572			if err != nil {
573				return err
574			}
575			return mount.WithTempMount(ctx, mounts, f)
576		default:
577			return fmt.Errorf("invalid USER value %s", userstr)
578		}
579	}
580}
581
582// WithUIDGID allows the UID and GID for the Process to be set
583func WithUIDGID(uid, gid uint32) SpecOpts {
584	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
585		setProcess(s)
586		s.Process.User.UID = uid
587		s.Process.User.GID = gid
588		return nil
589	}
590}
591
592// WithUserID sets the correct UID and GID for the container based
593// on the image's /etc/passwd contents. If /etc/passwd does not exist,
594// or uid is not found in /etc/passwd, it sets the requested uid,
595// additionally sets the gid to 0, and does not return an error.
596func WithUserID(uid uint32) SpecOpts {
597	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
598		setProcess(s)
599		if c.Snapshotter == "" && c.SnapshotKey == "" {
600			if !isRootfsAbs(s.Root.Path) {
601				return errors.Errorf("rootfs absolute path is required")
602			}
603			user, err := getUserFromPath(s.Root.Path, func(u user.User) bool {
604				return u.Uid == int(uid)
605			})
606			if err != nil {
607				if os.IsNotExist(err) || err == errNoUsersFound {
608					s.Process.User.UID, s.Process.User.GID = uid, 0
609					return nil
610				}
611				return err
612			}
613			s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
614			return nil
615
616		}
617		if c.Snapshotter == "" {
618			return errors.Errorf("no snapshotter set for container")
619		}
620		if c.SnapshotKey == "" {
621			return errors.Errorf("rootfs snapshot not created for container")
622		}
623		snapshotter := client.SnapshotService(c.Snapshotter)
624		mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
625		if err != nil {
626			return err
627		}
628		return mount.WithTempMount(ctx, mounts, func(root string) error {
629			user, err := getUserFromPath(root, func(u user.User) bool {
630				return u.Uid == int(uid)
631			})
632			if err != nil {
633				if os.IsNotExist(err) || err == errNoUsersFound {
634					s.Process.User.UID, s.Process.User.GID = uid, 0
635					return nil
636				}
637				return err
638			}
639			s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
640			return nil
641		})
642	}
643}
644
645// WithUsername sets the correct UID and GID for the container
646// based on the image's /etc/passwd contents. If /etc/passwd
647// does not exist, or the username is not found in /etc/passwd,
648// it returns error.
649func WithUsername(username string) SpecOpts {
650	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
651		setProcess(s)
652		if s.Linux != nil {
653			if c.Snapshotter == "" && c.SnapshotKey == "" {
654				if !isRootfsAbs(s.Root.Path) {
655					return errors.Errorf("rootfs absolute path is required")
656				}
657				user, err := getUserFromPath(s.Root.Path, func(u user.User) bool {
658					return u.Name == username
659				})
660				if err != nil {
661					return err
662				}
663				s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
664				return nil
665			}
666			if c.Snapshotter == "" {
667				return errors.Errorf("no snapshotter set for container")
668			}
669			if c.SnapshotKey == "" {
670				return errors.Errorf("rootfs snapshot not created for container")
671			}
672			snapshotter := client.SnapshotService(c.Snapshotter)
673			mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
674			if err != nil {
675				return err
676			}
677			return mount.WithTempMount(ctx, mounts, func(root string) error {
678				user, err := getUserFromPath(root, func(u user.User) bool {
679					return u.Name == username
680				})
681				if err != nil {
682					return err
683				}
684				s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
685				return nil
686			})
687		} else if s.Windows != nil {
688			s.Process.User.Username = username
689		} else {
690			return errors.New("spec does not contain Linux or Windows section")
691		}
692		return nil
693	}
694}
695
696// WithAdditionalGIDs sets the OCI spec's additionalGids array to any additional groups listed
697// for a particular user in the /etc/groups file of the image's root filesystem
698// The passed in user can be either a uid or a username.
699func WithAdditionalGIDs(userstr string) SpecOpts {
700	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
701		// For LCOW additional GID's not supported
702		if s.Windows != nil {
703			return nil
704		}
705		setProcess(s)
706		setAdditionalGids := func(root string) error {
707			var username string
708			uid, err := strconv.Atoi(userstr)
709			if err == nil {
710				user, err := getUserFromPath(root, func(u user.User) bool {
711					return u.Uid == uid
712				})
713				if err != nil {
714					if os.IsNotExist(err) || err == errNoUsersFound {
715						return nil
716					}
717					return err
718				}
719				username = user.Name
720			} else {
721				username = userstr
722			}
723			gids, err := getSupplementalGroupsFromPath(root, func(g user.Group) bool {
724				// we only want supplemental groups
725				if g.Name == username {
726					return false
727				}
728				for _, entry := range g.List {
729					if entry == username {
730						return true
731					}
732				}
733				return false
734			})
735			if err != nil {
736				if os.IsNotExist(err) {
737					return nil
738				}
739				return err
740			}
741			s.Process.User.AdditionalGids = gids
742			return nil
743		}
744		if c.Snapshotter == "" && c.SnapshotKey == "" {
745			if !isRootfsAbs(s.Root.Path) {
746				return errors.Errorf("rootfs absolute path is required")
747			}
748			return setAdditionalGids(s.Root.Path)
749		}
750		if c.Snapshotter == "" {
751			return errors.Errorf("no snapshotter set for container")
752		}
753		if c.SnapshotKey == "" {
754			return errors.Errorf("rootfs snapshot not created for container")
755		}
756		snapshotter := client.SnapshotService(c.Snapshotter)
757		mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
758		if err != nil {
759			return err
760		}
761		return mount.WithTempMount(ctx, mounts, setAdditionalGids)
762	}
763}
764
765// WithCapabilities sets Linux capabilities on the process
766func WithCapabilities(caps []string) SpecOpts {
767	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
768		setCapabilities(s)
769
770		s.Process.Capabilities.Bounding = caps
771		s.Process.Capabilities.Effective = caps
772		s.Process.Capabilities.Permitted = caps
773		s.Process.Capabilities.Inheritable = caps
774
775		return nil
776	}
777}
778
779// WithAllCapabilities sets all linux capabilities for the process
780var WithAllCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
781	return WithCapabilities(GetAllCapabilities())(ctx, client, c, s)
782}
783
784// GetAllCapabilities returns all caps up to CAP_LAST_CAP
785// or CAP_BLOCK_SUSPEND on RHEL6
786func GetAllCapabilities() []string {
787	last := capability.CAP_LAST_CAP
788	// hack for RHEL6 which has no /proc/sys/kernel/cap_last_cap
789	if last == capability.Cap(63) {
790		last = capability.CAP_BLOCK_SUSPEND
791	}
792	var caps []string
793	for _, cap := range capability.List() {
794		if cap > last {
795			continue
796		}
797		caps = append(caps, "CAP_"+strings.ToUpper(cap.String()))
798	}
799	return caps
800}
801
802func capsContain(caps []string, s string) bool {
803	for _, c := range caps {
804		if c == s {
805			return true
806		}
807	}
808	return false
809}
810
811func removeCap(caps *[]string, s string) {
812	var newcaps []string
813	for _, c := range *caps {
814		if c == s {
815			continue
816		}
817		newcaps = append(newcaps, c)
818	}
819	*caps = newcaps
820}
821
822// WithAddedCapabilities adds the provided capabilities
823func WithAddedCapabilities(caps []string) SpecOpts {
824	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
825		setCapabilities(s)
826		for _, c := range caps {
827			for _, cl := range []*[]string{
828				&s.Process.Capabilities.Bounding,
829				&s.Process.Capabilities.Effective,
830				&s.Process.Capabilities.Permitted,
831				&s.Process.Capabilities.Inheritable,
832			} {
833				if !capsContain(*cl, c) {
834					*cl = append(*cl, c)
835				}
836			}
837		}
838		return nil
839	}
840}
841
842// WithDroppedCapabilities removes the provided capabilities
843func WithDroppedCapabilities(caps []string) SpecOpts {
844	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
845		setCapabilities(s)
846		for _, c := range caps {
847			for _, cl := range []*[]string{
848				&s.Process.Capabilities.Bounding,
849				&s.Process.Capabilities.Effective,
850				&s.Process.Capabilities.Permitted,
851				&s.Process.Capabilities.Inheritable,
852			} {
853				removeCap(cl, c)
854			}
855		}
856		return nil
857	}
858}
859
860// WithAmbientCapabilities set the Linux ambient capabilities for the process
861// Ambient capabilities should only be set for non-root users or the caller should
862// understand how these capabilities are used and set
863func WithAmbientCapabilities(caps []string) SpecOpts {
864	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
865		setCapabilities(s)
866
867		s.Process.Capabilities.Ambient = caps
868		return nil
869	}
870}
871
872var errNoUsersFound = errors.New("no users found")
873
874func getUserFromPath(root string, filter func(user.User) bool) (user.User, error) {
875	ppath, err := fs.RootPath(root, "/etc/passwd")
876	if err != nil {
877		return user.User{}, err
878	}
879	users, err := user.ParsePasswdFileFilter(ppath, filter)
880	if err != nil {
881		return user.User{}, err
882	}
883	if len(users) == 0 {
884		return user.User{}, errNoUsersFound
885	}
886	return users[0], nil
887}
888
889var errNoGroupsFound = errors.New("no groups found")
890
891func getGIDFromPath(root string, filter func(user.Group) bool) (gid uint32, err error) {
892	gpath, err := fs.RootPath(root, "/etc/group")
893	if err != nil {
894		return 0, err
895	}
896	groups, err := user.ParseGroupFileFilter(gpath, filter)
897	if err != nil {
898		return 0, err
899	}
900	if len(groups) == 0 {
901		return 0, errNoGroupsFound
902	}
903	g := groups[0]
904	return uint32(g.Gid), nil
905}
906
907func getSupplementalGroupsFromPath(root string, filter func(user.Group) bool) ([]uint32, error) {
908	gpath, err := fs.RootPath(root, "/etc/group")
909	if err != nil {
910		return []uint32{}, err
911	}
912	groups, err := user.ParseGroupFileFilter(gpath, filter)
913	if err != nil {
914		return []uint32{}, err
915	}
916	if len(groups) == 0 {
917		// if there are no additional groups; just return an empty set
918		return []uint32{}, nil
919	}
920	addlGids := []uint32{}
921	for _, grp := range groups {
922		addlGids = append(addlGids, uint32(grp.Gid))
923	}
924	return addlGids, nil
925}
926
927func isRootfsAbs(root string) bool {
928	return filepath.IsAbs(root)
929}
930
931// WithMaskedPaths sets the masked paths option
932func WithMaskedPaths(paths []string) SpecOpts {
933	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
934		setLinux(s)
935		s.Linux.MaskedPaths = paths
936		return nil
937	}
938}
939
940// WithReadonlyPaths sets the read only paths option
941func WithReadonlyPaths(paths []string) SpecOpts {
942	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
943		setLinux(s)
944		s.Linux.ReadonlyPaths = paths
945		return nil
946	}
947}
948
949// WithWriteableSysfs makes any sysfs mounts writeable
950func WithWriteableSysfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
951	for i, m := range s.Mounts {
952		if m.Type == "sysfs" {
953			var options []string
954			for _, o := range m.Options {
955				if o == "ro" {
956					o = "rw"
957				}
958				options = append(options, o)
959			}
960			s.Mounts[i].Options = options
961		}
962	}
963	return nil
964}
965
966// WithWriteableCgroupfs makes any cgroup mounts writeable
967func WithWriteableCgroupfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
968	for i, m := range s.Mounts {
969		if m.Type == "cgroup" {
970			var options []string
971			for _, o := range m.Options {
972				if o == "ro" {
973					o = "rw"
974				}
975				options = append(options, o)
976			}
977			s.Mounts[i].Options = options
978		}
979	}
980	return nil
981}
982
983// WithSelinuxLabel sets the process SELinux label
984func WithSelinuxLabel(label string) SpecOpts {
985	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
986		setProcess(s)
987		s.Process.SelinuxLabel = label
988		return nil
989	}
990}
991
992// WithApparmorProfile sets the Apparmor profile for the process
993func WithApparmorProfile(profile string) SpecOpts {
994	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
995		setProcess(s)
996		s.Process.ApparmorProfile = profile
997		return nil
998	}
999}
1000
1001// WithSeccompUnconfined clears the seccomp profile
1002func WithSeccompUnconfined(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1003	setLinux(s)
1004	s.Linux.Seccomp = nil
1005	return nil
1006}
1007
1008// WithParentCgroupDevices uses the default cgroup setup to inherit the container's parent cgroup's
1009// allowed and denied devices
1010func WithParentCgroupDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1011	setLinux(s)
1012	if s.Linux.Resources == nil {
1013		s.Linux.Resources = &specs.LinuxResources{}
1014	}
1015	s.Linux.Resources.Devices = nil
1016	return nil
1017}
1018
1019// WithAllDevicesAllowed permits READ WRITE MKNOD on all devices nodes for the container
1020func WithAllDevicesAllowed(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1021	setLinux(s)
1022	if s.Linux.Resources == nil {
1023		s.Linux.Resources = &specs.LinuxResources{}
1024	}
1025	s.Linux.Resources.Devices = []specs.LinuxDeviceCgroup{
1026		{
1027			Allow:  true,
1028			Access: rwm,
1029		},
1030	}
1031	return nil
1032}
1033
1034// WithDefaultUnixDevices adds the default devices for unix such as /dev/null, /dev/random to
1035// the container's resource cgroup spec
1036func WithDefaultUnixDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1037	setLinux(s)
1038	if s.Linux.Resources == nil {
1039		s.Linux.Resources = &specs.LinuxResources{}
1040	}
1041	intptr := func(i int64) *int64 {
1042		return &i
1043	}
1044	s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, []specs.LinuxDeviceCgroup{
1045		{
1046			// "/dev/null",
1047			Type:   "c",
1048			Major:  intptr(1),
1049			Minor:  intptr(3),
1050			Access: rwm,
1051			Allow:  true,
1052		},
1053		{
1054			// "/dev/random",
1055			Type:   "c",
1056			Major:  intptr(1),
1057			Minor:  intptr(8),
1058			Access: rwm,
1059			Allow:  true,
1060		},
1061		{
1062			// "/dev/full",
1063			Type:   "c",
1064			Major:  intptr(1),
1065			Minor:  intptr(7),
1066			Access: rwm,
1067			Allow:  true,
1068		},
1069		{
1070			// "/dev/tty",
1071			Type:   "c",
1072			Major:  intptr(5),
1073			Minor:  intptr(0),
1074			Access: rwm,
1075			Allow:  true,
1076		},
1077		{
1078			// "/dev/zero",
1079			Type:   "c",
1080			Major:  intptr(1),
1081			Minor:  intptr(5),
1082			Access: rwm,
1083			Allow:  true,
1084		},
1085		{
1086			// "/dev/urandom",
1087			Type:   "c",
1088			Major:  intptr(1),
1089			Minor:  intptr(9),
1090			Access: rwm,
1091			Allow:  true,
1092		},
1093		{
1094			// "/dev/console",
1095			Type:   "c",
1096			Major:  intptr(5),
1097			Minor:  intptr(1),
1098			Access: rwm,
1099			Allow:  true,
1100		},
1101		// /dev/pts/ - pts namespaces are "coming soon"
1102		{
1103			Type:   "c",
1104			Major:  intptr(136),
1105			Access: rwm,
1106			Allow:  true,
1107		},
1108		{
1109			Type:   "c",
1110			Major:  intptr(5),
1111			Minor:  intptr(2),
1112			Access: rwm,
1113			Allow:  true,
1114		},
1115		{
1116			// tuntap
1117			Type:   "c",
1118			Major:  intptr(10),
1119			Minor:  intptr(200),
1120			Access: rwm,
1121			Allow:  true,
1122		},
1123	}...)
1124	return nil
1125}
1126
1127// WithPrivileged sets up options for a privileged container
1128var WithPrivileged = Compose(
1129	WithAllCapabilities,
1130	WithMaskedPaths(nil),
1131	WithReadonlyPaths(nil),
1132	WithWriteableSysfs,
1133	WithWriteableCgroupfs,
1134	WithSelinuxLabel(""),
1135	WithApparmorProfile(""),
1136	WithSeccompUnconfined,
1137)
1138
1139// WithWindowsHyperV sets the Windows.HyperV section for HyperV isolation of containers.
1140func WithWindowsHyperV(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1141	if s.Windows == nil {
1142		s.Windows = &specs.Windows{}
1143	}
1144	if s.Windows.HyperV == nil {
1145		s.Windows.HyperV = &specs.WindowsHyperV{}
1146	}
1147	return nil
1148}
1149
1150// WithMemoryLimit sets the `Linux.LinuxResources.Memory.Limit` section to the
1151// `limit` specified if the `Linux` section is not `nil`. Additionally sets the
1152// `Windows.WindowsResources.Memory.Limit` section if the `Windows` section is
1153// not `nil`.
1154func WithMemoryLimit(limit uint64) SpecOpts {
1155	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1156		if s.Linux != nil {
1157			if s.Linux.Resources == nil {
1158				s.Linux.Resources = &specs.LinuxResources{}
1159			}
1160			if s.Linux.Resources.Memory == nil {
1161				s.Linux.Resources.Memory = &specs.LinuxMemory{}
1162			}
1163			l := int64(limit)
1164			s.Linux.Resources.Memory.Limit = &l
1165		}
1166		if s.Windows != nil {
1167			if s.Windows.Resources == nil {
1168				s.Windows.Resources = &specs.WindowsResources{}
1169			}
1170			if s.Windows.Resources.Memory == nil {
1171				s.Windows.Resources.Memory = &specs.WindowsMemoryResources{}
1172			}
1173			s.Windows.Resources.Memory.Limit = &limit
1174		}
1175		return nil
1176	}
1177}
1178
1179// WithAnnotations appends or replaces the annotations on the spec with the
1180// provided annotations
1181func WithAnnotations(annotations map[string]string) SpecOpts {
1182	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1183		if s.Annotations == nil {
1184			s.Annotations = make(map[string]string)
1185		}
1186		for k, v := range annotations {
1187			s.Annotations[k] = v
1188		}
1189		return nil
1190	}
1191}
1192
1193// WithLinuxDevices adds the provided linux devices to the spec
1194func WithLinuxDevices(devices []specs.LinuxDevice) SpecOpts {
1195	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1196		setLinux(s)
1197		s.Linux.Devices = append(s.Linux.Devices, devices...)
1198		return nil
1199	}
1200}
1201
1202var ErrNotADevice = errors.New("not a device node")
1203
1204// WithLinuxDevice adds the device specified by path to the spec
1205func WithLinuxDevice(path, permissions string) SpecOpts {
1206	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1207		setLinux(s)
1208		setResources(s)
1209
1210		dev, err := deviceFromPath(path, permissions)
1211		if err != nil {
1212			return err
1213		}
1214
1215		s.Linux.Devices = append(s.Linux.Devices, *dev)
1216
1217		s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, specs.LinuxDeviceCgroup{
1218			Type:   dev.Type,
1219			Allow:  true,
1220			Major:  &dev.Major,
1221			Minor:  &dev.Minor,
1222			Access: permissions,
1223		})
1224
1225		return nil
1226	}
1227}
1228
1229// WithEnvFile adds environment variables from a file to the container's spec
1230func WithEnvFile(path string) SpecOpts {
1231	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1232		var vars []string
1233		f, err := os.Open(path)
1234		if err != nil {
1235			return err
1236		}
1237		defer f.Close()
1238
1239		sc := bufio.NewScanner(f)
1240		for sc.Scan() {
1241			vars = append(vars, sc.Text())
1242		}
1243		if err = sc.Err(); err != nil {
1244			return err
1245		}
1246		return WithEnv(vars)(nil, nil, nil, s)
1247	}
1248}
1249
1250// ErrNoShmMount is returned when there is no /dev/shm mount specified in the config
1251// and an Opts was trying to set a configuration value on the mount.
1252var ErrNoShmMount = errors.New("no /dev/shm mount specified")
1253
1254// WithDevShmSize sets the size of the /dev/shm mount for the container.
1255//
1256// The size value is specified in kb, kilobytes.
1257func WithDevShmSize(kb int64) SpecOpts {
1258	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
1259		for _, m := range s.Mounts {
1260			if m.Source == "shm" && m.Type == "tmpfs" {
1261				for i, o := range m.Options {
1262					if strings.HasPrefix(o, "size=") {
1263						m.Options[i] = fmt.Sprintf("size=%dk", kb)
1264						return nil
1265					}
1266				}
1267				m.Options = append(m.Options, fmt.Sprintf("size=%dk", kb))
1268				return nil
1269			}
1270		}
1271		return ErrNoShmMount
1272	}
1273}
1274