1/*
2   Copyright The containerd Authors.
3
4   Licensed under the Apache License, Version 2.0 (the "License");
5   you may not use this file except in compliance with the License.
6   You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10   Unless required by applicable law or agreed to in writing, software
11   distributed under the License is distributed on an "AS IS" BASIS,
12   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   See the License for the specific language governing permissions and
14   limitations under the License.
15*/
16
17package oci
18
19import (
20	"bufio"
21	"context"
22	"encoding/json"
23	"fmt"
24	"io/ioutil"
25	"os"
26	"path/filepath"
27	"strconv"
28	"strings"
29
30	"github.com/containerd/containerd/containers"
31	"github.com/containerd/containerd/content"
32	"github.com/containerd/containerd/images"
33	"github.com/containerd/containerd/mount"
34	"github.com/containerd/containerd/namespaces"
35	"github.com/containerd/containerd/platforms"
36	"github.com/containerd/continuity/fs"
37	v1 "github.com/opencontainers/image-spec/specs-go/v1"
38	"github.com/opencontainers/runc/libcontainer/user"
39	specs "github.com/opencontainers/runtime-spec/specs-go"
40	"github.com/pkg/errors"
41)
42
43// SpecOpts sets spec specific information to a newly generated OCI spec
44type SpecOpts func(context.Context, Client, *containers.Container, *Spec) error
45
46// Compose converts a sequence of spec operations into a single operation
47func Compose(opts ...SpecOpts) SpecOpts {
48	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
49		for _, o := range opts {
50			if err := o(ctx, client, c, s); err != nil {
51				return err
52			}
53		}
54		return nil
55	}
56}
57
58// setProcess sets Process to empty if unset
59func setProcess(s *Spec) {
60	if s.Process == nil {
61		s.Process = &specs.Process{}
62	}
63}
64
65// setRoot sets Root to empty if unset
66func setRoot(s *Spec) {
67	if s.Root == nil {
68		s.Root = &specs.Root{}
69	}
70}
71
72// setLinux sets Linux to empty if unset
73func setLinux(s *Spec) {
74	if s.Linux == nil {
75		s.Linux = &specs.Linux{}
76	}
77}
78
79// nolint
80func setResources(s *Spec) {
81	if s.Linux != nil {
82		if s.Linux.Resources == nil {
83			s.Linux.Resources = &specs.LinuxResources{}
84		}
85	}
86	if s.Windows != nil {
87		if s.Windows.Resources == nil {
88			s.Windows.Resources = &specs.WindowsResources{}
89		}
90	}
91}
92
93// nolint
94func setCPU(s *Spec) {
95	setResources(s)
96	if s.Linux != nil {
97		if s.Linux.Resources.CPU == nil {
98			s.Linux.Resources.CPU = &specs.LinuxCPU{}
99		}
100	}
101	if s.Windows != nil {
102		if s.Windows.Resources.CPU == nil {
103			s.Windows.Resources.CPU = &specs.WindowsCPUResources{}
104		}
105	}
106}
107
108// setCapabilities sets Linux Capabilities to empty if unset
109func setCapabilities(s *Spec) {
110	setProcess(s)
111	if s.Process.Capabilities == nil {
112		s.Process.Capabilities = &specs.LinuxCapabilities{}
113	}
114}
115
116// WithDefaultSpec returns a SpecOpts that will populate the spec with default
117// values.
118//
119// Use as the first option to clear the spec, then apply options afterwards.
120func WithDefaultSpec() SpecOpts {
121	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
122		return generateDefaultSpecWithPlatform(ctx, platforms.DefaultString(), c.ID, s)
123	}
124}
125
126// WithDefaultSpecForPlatform returns a SpecOpts that will populate the spec
127// with default values for a given platform.
128//
129// Use as the first option to clear the spec, then apply options afterwards.
130func WithDefaultSpecForPlatform(platform string) SpecOpts {
131	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
132		return generateDefaultSpecWithPlatform(ctx, platform, c.ID, s)
133	}
134}
135
136// WithSpecFromBytes loads the spec from the provided byte slice.
137func WithSpecFromBytes(p []byte) SpecOpts {
138	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
139		*s = Spec{} // make sure spec is cleared.
140		if err := json.Unmarshal(p, s); err != nil {
141			return errors.Wrapf(err, "decoding spec config file failed, current supported OCI runtime-spec : v%s", specs.Version)
142		}
143		return nil
144	}
145}
146
147// WithSpecFromFile loads the specification from the provided filename.
148func WithSpecFromFile(filename string) SpecOpts {
149	return func(ctx context.Context, c Client, container *containers.Container, s *Spec) error {
150		p, err := ioutil.ReadFile(filename)
151		if err != nil {
152			return errors.Wrap(err, "cannot load spec config file")
153		}
154		return WithSpecFromBytes(p)(ctx, c, container, s)
155	}
156}
157
158// WithEnv appends environment variables
159func WithEnv(environmentVariables []string) SpecOpts {
160	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
161		if len(environmentVariables) > 0 {
162			setProcess(s)
163			s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, environmentVariables)
164		}
165		return nil
166	}
167}
168
169// WithDefaultPathEnv sets the $PATH environment variable to the
170// default PATH defined in this package.
171func WithDefaultPathEnv(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
172	s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, defaultUnixEnv)
173	return nil
174}
175
176// replaceOrAppendEnvValues returns the defaults with the overrides either
177// replaced by env key or appended to the list
178func replaceOrAppendEnvValues(defaults, overrides []string) []string {
179	cache := make(map[string]int, len(defaults))
180	results := make([]string, 0, len(defaults))
181	for i, e := range defaults {
182		parts := strings.SplitN(e, "=", 2)
183		results = append(results, e)
184		cache[parts[0]] = i
185	}
186
187	for _, value := range overrides {
188		// Values w/o = means they want this env to be removed/unset.
189		if !strings.Contains(value, "=") {
190			if i, exists := cache[value]; exists {
191				results[i] = "" // Used to indicate it should be removed
192			}
193			continue
194		}
195
196		// Just do a normal set/update
197		parts := strings.SplitN(value, "=", 2)
198		if i, exists := cache[parts[0]]; exists {
199			results[i] = value
200		} else {
201			results = append(results, value)
202		}
203	}
204
205	// Now remove all entries that we want to "unset"
206	for i := 0; i < len(results); i++ {
207		if results[i] == "" {
208			results = append(results[:i], results[i+1:]...)
209			i--
210		}
211	}
212
213	return results
214}
215
216// WithProcessArgs replaces the args on the generated spec
217func WithProcessArgs(args ...string) SpecOpts {
218	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
219		setProcess(s)
220		s.Process.Args = args
221		return nil
222	}
223}
224
225// WithProcessCwd replaces the current working directory on the generated spec
226func WithProcessCwd(cwd string) SpecOpts {
227	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
228		setProcess(s)
229		s.Process.Cwd = cwd
230		return nil
231	}
232}
233
234// WithTTY sets the information on the spec as well as the environment variables for
235// using a TTY
236func WithTTY(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
237	setProcess(s)
238	s.Process.Terminal = true
239	if s.Linux != nil {
240		s.Process.Env = append(s.Process.Env, "TERM=xterm")
241	}
242
243	return nil
244}
245
246// WithTTYSize sets the information on the spec as well as the environment variables for
247// using a TTY
248func WithTTYSize(width, height int) SpecOpts {
249	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
250		setProcess(s)
251		if s.Process.ConsoleSize == nil {
252			s.Process.ConsoleSize = &specs.Box{}
253		}
254		s.Process.ConsoleSize.Width = uint(width)
255		s.Process.ConsoleSize.Height = uint(height)
256		return nil
257	}
258}
259
260// WithHostname sets the container's hostname
261func WithHostname(name string) SpecOpts {
262	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
263		s.Hostname = name
264		return nil
265	}
266}
267
268// WithMounts appends mounts
269func WithMounts(mounts []specs.Mount) SpecOpts {
270	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
271		s.Mounts = append(s.Mounts, mounts...)
272		return nil
273	}
274}
275
276// WithoutMounts removes mounts
277func WithoutMounts(dests ...string) SpecOpts {
278	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
279		var (
280			mounts  []specs.Mount
281			current = s.Mounts
282		)
283	mLoop:
284		for _, m := range current {
285			mDestination := filepath.Clean(m.Destination)
286			for _, dest := range dests {
287				if mDestination == dest {
288					continue mLoop
289				}
290			}
291			mounts = append(mounts, m)
292		}
293		s.Mounts = mounts
294		return nil
295	}
296}
297
298// WithHostNamespace allows a task to run inside the host's linux namespace
299func WithHostNamespace(ns specs.LinuxNamespaceType) SpecOpts {
300	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
301		setLinux(s)
302		for i, n := range s.Linux.Namespaces {
303			if n.Type == ns {
304				s.Linux.Namespaces = append(s.Linux.Namespaces[:i], s.Linux.Namespaces[i+1:]...)
305				return nil
306			}
307		}
308		return nil
309	}
310}
311
312// WithLinuxNamespace uses the passed in namespace for the spec. If a namespace of the same type already exists in the
313// spec, the existing namespace is replaced by the one provided.
314func WithLinuxNamespace(ns specs.LinuxNamespace) SpecOpts {
315	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
316		setLinux(s)
317		for i, n := range s.Linux.Namespaces {
318			if n.Type == ns.Type {
319				s.Linux.Namespaces[i] = ns
320				return nil
321			}
322		}
323		s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
324		return nil
325	}
326}
327
328// WithNewPrivileges turns off the NoNewPrivileges feature flag in the spec
329func WithNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
330	setProcess(s)
331	s.Process.NoNewPrivileges = false
332
333	return nil
334}
335
336// WithImageConfig configures the spec to from the configuration of an Image
337func WithImageConfig(image Image) SpecOpts {
338	return WithImageConfigArgs(image, nil)
339}
340
341// WithImageConfigArgs configures the spec to from the configuration of an Image with additional args that
342// replaces the CMD of the image
343func WithImageConfigArgs(image Image, args []string) SpecOpts {
344	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
345		ic, err := image.Config(ctx)
346		if err != nil {
347			return err
348		}
349		var (
350			ociimage v1.Image
351			config   v1.ImageConfig
352		)
353		switch ic.MediaType {
354		case v1.MediaTypeImageConfig, images.MediaTypeDockerSchema2Config:
355			p, err := content.ReadBlob(ctx, image.ContentStore(), ic)
356			if err != nil {
357				return err
358			}
359
360			if err := json.Unmarshal(p, &ociimage); err != nil {
361				return err
362			}
363			config = ociimage.Config
364		default:
365			return fmt.Errorf("unknown image config media type %s", ic.MediaType)
366		}
367
368		setProcess(s)
369		if s.Linux != nil {
370			defaults := config.Env
371			if len(defaults) == 0 {
372				defaults = defaultUnixEnv
373			}
374			s.Process.Env = replaceOrAppendEnvValues(defaults, s.Process.Env)
375			cmd := config.Cmd
376			if len(args) > 0 {
377				cmd = args
378			}
379			s.Process.Args = append(config.Entrypoint, cmd...)
380
381			cwd := config.WorkingDir
382			if cwd == "" {
383				cwd = "/"
384			}
385			s.Process.Cwd = cwd
386			if config.User != "" {
387				if err := WithUser(config.User)(ctx, client, c, s); err != nil {
388					return err
389				}
390				return WithAdditionalGIDs(fmt.Sprintf("%d", s.Process.User.UID))(ctx, client, c, s)
391			}
392			// we should query the image's /etc/group for additional GIDs
393			// even if there is no specified user in the image config
394			return WithAdditionalGIDs("root")(ctx, client, c, s)
395		} else if s.Windows != nil {
396			s.Process.Env = replaceOrAppendEnvValues(config.Env, s.Process.Env)
397			cmd := config.Cmd
398			if len(args) > 0 {
399				cmd = args
400			}
401			s.Process.Args = append(config.Entrypoint, cmd...)
402
403			s.Process.Cwd = config.WorkingDir
404			s.Process.User = specs.User{
405				Username: config.User,
406			}
407		} else {
408			return errors.New("spec does not contain Linux or Windows section")
409		}
410		return nil
411	}
412}
413
414// WithRootFSPath specifies unmanaged rootfs path.
415func WithRootFSPath(path string) SpecOpts {
416	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
417		setRoot(s)
418		s.Root.Path = path
419		// Entrypoint is not set here (it's up to caller)
420		return nil
421	}
422}
423
424// WithRootFSReadonly sets specs.Root.Readonly to true
425func WithRootFSReadonly() SpecOpts {
426	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
427		setRoot(s)
428		s.Root.Readonly = true
429		return nil
430	}
431}
432
433// WithNoNewPrivileges sets no_new_privileges on the process for the container
434func WithNoNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
435	setProcess(s)
436	s.Process.NoNewPrivileges = true
437	return nil
438}
439
440// WithHostHostsFile bind-mounts the host's /etc/hosts into the container as readonly
441func WithHostHostsFile(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
442	s.Mounts = append(s.Mounts, specs.Mount{
443		Destination: "/etc/hosts",
444		Type:        "bind",
445		Source:      "/etc/hosts",
446		Options:     []string{"rbind", "ro"},
447	})
448	return nil
449}
450
451// WithHostResolvconf bind-mounts the host's /etc/resolv.conf into the container as readonly
452func WithHostResolvconf(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
453	s.Mounts = append(s.Mounts, specs.Mount{
454		Destination: "/etc/resolv.conf",
455		Type:        "bind",
456		Source:      "/etc/resolv.conf",
457		Options:     []string{"rbind", "ro"},
458	})
459	return nil
460}
461
462// WithHostLocaltime bind-mounts the host's /etc/localtime into the container as readonly
463func WithHostLocaltime(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
464	s.Mounts = append(s.Mounts, specs.Mount{
465		Destination: "/etc/localtime",
466		Type:        "bind",
467		Source:      "/etc/localtime",
468		Options:     []string{"rbind", "ro"},
469	})
470	return nil
471}
472
473// WithUserNamespace sets the uid and gid mappings for the task
474// this can be called multiple times to add more mappings to the generated spec
475func WithUserNamespace(uidMap, gidMap []specs.LinuxIDMapping) SpecOpts {
476	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
477		var hasUserns bool
478		setLinux(s)
479		for _, ns := range s.Linux.Namespaces {
480			if ns.Type == specs.UserNamespace {
481				hasUserns = true
482				break
483			}
484		}
485		if !hasUserns {
486			s.Linux.Namespaces = append(s.Linux.Namespaces, specs.LinuxNamespace{
487				Type: specs.UserNamespace,
488			})
489		}
490		s.Linux.UIDMappings = append(s.Linux.UIDMappings, uidMap...)
491		s.Linux.GIDMappings = append(s.Linux.GIDMappings, gidMap...)
492		return nil
493	}
494}
495
496// WithCgroup sets the container's cgroup path
497func WithCgroup(path string) SpecOpts {
498	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
499		setLinux(s)
500		s.Linux.CgroupsPath = path
501		return nil
502	}
503}
504
505// WithNamespacedCgroup uses the namespace set on the context to create a
506// root directory for containers in the cgroup with the id as the subcgroup
507func WithNamespacedCgroup() SpecOpts {
508	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
509		namespace, err := namespaces.NamespaceRequired(ctx)
510		if err != nil {
511			return err
512		}
513		setLinux(s)
514		s.Linux.CgroupsPath = filepath.Join("/", namespace, c.ID)
515		return nil
516	}
517}
518
519// WithUser sets the user to be used within the container.
520// It accepts a valid user string in OCI Image Spec v1.0.0:
521//   user, uid, user:group, uid:gid, uid:group, user:gid
522func WithUser(userstr string) SpecOpts {
523	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
524		setProcess(s)
525		parts := strings.Split(userstr, ":")
526		switch len(parts) {
527		case 1:
528			v, err := strconv.Atoi(parts[0])
529			if err != nil {
530				// if we cannot parse as a uint they try to see if it is a username
531				return WithUsername(userstr)(ctx, client, c, s)
532			}
533			return WithUserID(uint32(v))(ctx, client, c, s)
534		case 2:
535			var (
536				username  string
537				groupname string
538			)
539			var uid, gid uint32
540			v, err := strconv.Atoi(parts[0])
541			if err != nil {
542				username = parts[0]
543			} else {
544				uid = uint32(v)
545			}
546			if v, err = strconv.Atoi(parts[1]); err != nil {
547				groupname = parts[1]
548			} else {
549				gid = uint32(v)
550			}
551			if username == "" && groupname == "" {
552				s.Process.User.UID, s.Process.User.GID = uid, gid
553				return nil
554			}
555			f := func(root string) error {
556				if username != "" {
557					user, err := UserFromPath(root, func(u user.User) bool {
558						return u.Name == username
559					})
560					if err != nil {
561						return err
562					}
563					uid = uint32(user.Uid)
564				}
565				if groupname != "" {
566					gid, err = GIDFromPath(root, func(g user.Group) bool {
567						return g.Name == groupname
568					})
569					if err != nil {
570						return err
571					}
572				}
573				s.Process.User.UID, s.Process.User.GID = uid, gid
574				return nil
575			}
576			if c.Snapshotter == "" && c.SnapshotKey == "" {
577				if !isRootfsAbs(s.Root.Path) {
578					return errors.New("rootfs absolute path is required")
579				}
580				return f(s.Root.Path)
581			}
582			if c.Snapshotter == "" {
583				return errors.New("no snapshotter set for container")
584			}
585			if c.SnapshotKey == "" {
586				return errors.New("rootfs snapshot not created for container")
587			}
588			snapshotter := client.SnapshotService(c.Snapshotter)
589			mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
590			if err != nil {
591				return err
592			}
593			return mount.WithTempMount(ctx, mounts, f)
594		default:
595			return fmt.Errorf("invalid USER value %s", userstr)
596		}
597	}
598}
599
600// WithUIDGID allows the UID and GID for the Process to be set
601func WithUIDGID(uid, gid uint32) SpecOpts {
602	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
603		setProcess(s)
604		s.Process.User.UID = uid
605		s.Process.User.GID = gid
606		return nil
607	}
608}
609
610// WithUserID sets the correct UID and GID for the container based
611// on the image's /etc/passwd contents. If /etc/passwd does not exist,
612// or uid is not found in /etc/passwd, it sets the requested uid,
613// additionally sets the gid to 0, and does not return an error.
614func WithUserID(uid uint32) SpecOpts {
615	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
616		setProcess(s)
617		if c.Snapshotter == "" && c.SnapshotKey == "" {
618			if !isRootfsAbs(s.Root.Path) {
619				return errors.Errorf("rootfs absolute path is required")
620			}
621			user, err := UserFromPath(s.Root.Path, func(u user.User) bool {
622				return u.Uid == int(uid)
623			})
624			if err != nil {
625				if os.IsNotExist(err) || err == ErrNoUsersFound {
626					s.Process.User.UID, s.Process.User.GID = uid, 0
627					return nil
628				}
629				return err
630			}
631			s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
632			return nil
633
634		}
635		if c.Snapshotter == "" {
636			return errors.Errorf("no snapshotter set for container")
637		}
638		if c.SnapshotKey == "" {
639			return errors.Errorf("rootfs snapshot not created for container")
640		}
641		snapshotter := client.SnapshotService(c.Snapshotter)
642		mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
643		if err != nil {
644			return err
645		}
646		return mount.WithTempMount(ctx, mounts, func(root string) error {
647			user, err := UserFromPath(root, func(u user.User) bool {
648				return u.Uid == int(uid)
649			})
650			if err != nil {
651				if os.IsNotExist(err) || err == ErrNoUsersFound {
652					s.Process.User.UID, s.Process.User.GID = uid, 0
653					return nil
654				}
655				return err
656			}
657			s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
658			return nil
659		})
660	}
661}
662
663// WithUsername sets the correct UID and GID for the container
664// based on the image's /etc/passwd contents. If /etc/passwd
665// does not exist, or the username is not found in /etc/passwd,
666// it returns error.
667func WithUsername(username string) SpecOpts {
668	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
669		setProcess(s)
670		if s.Linux != nil {
671			if c.Snapshotter == "" && c.SnapshotKey == "" {
672				if !isRootfsAbs(s.Root.Path) {
673					return errors.Errorf("rootfs absolute path is required")
674				}
675				user, err := UserFromPath(s.Root.Path, func(u user.User) bool {
676					return u.Name == username
677				})
678				if err != nil {
679					return err
680				}
681				s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
682				return nil
683			}
684			if c.Snapshotter == "" {
685				return errors.Errorf("no snapshotter set for container")
686			}
687			if c.SnapshotKey == "" {
688				return errors.Errorf("rootfs snapshot not created for container")
689			}
690			snapshotter := client.SnapshotService(c.Snapshotter)
691			mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
692			if err != nil {
693				return err
694			}
695			return mount.WithTempMount(ctx, mounts, func(root string) error {
696				user, err := UserFromPath(root, func(u user.User) bool {
697					return u.Name == username
698				})
699				if err != nil {
700					return err
701				}
702				s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
703				return nil
704			})
705		} else if s.Windows != nil {
706			s.Process.User.Username = username
707		} else {
708			return errors.New("spec does not contain Linux or Windows section")
709		}
710		return nil
711	}
712}
713
714// WithAdditionalGIDs sets the OCI spec's additionalGids array to any additional groups listed
715// for a particular user in the /etc/groups file of the image's root filesystem
716// The passed in user can be either a uid or a username.
717func WithAdditionalGIDs(userstr string) SpecOpts {
718	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
719		// For LCOW additional GID's not supported
720		if s.Windows != nil {
721			return nil
722		}
723		setProcess(s)
724		setAdditionalGids := func(root string) error {
725			var username string
726			uid, err := strconv.Atoi(userstr)
727			if err == nil {
728				user, err := UserFromPath(root, func(u user.User) bool {
729					return u.Uid == uid
730				})
731				if err != nil {
732					if os.IsNotExist(err) || err == ErrNoUsersFound {
733						return nil
734					}
735					return err
736				}
737				username = user.Name
738			} else {
739				username = userstr
740			}
741			gids, err := getSupplementalGroupsFromPath(root, func(g user.Group) bool {
742				// we only want supplemental groups
743				if g.Name == username {
744					return false
745				}
746				for _, entry := range g.List {
747					if entry == username {
748						return true
749					}
750				}
751				return false
752			})
753			if err != nil {
754				if os.IsNotExist(err) {
755					return nil
756				}
757				return err
758			}
759			s.Process.User.AdditionalGids = gids
760			return nil
761		}
762		if c.Snapshotter == "" && c.SnapshotKey == "" {
763			if !isRootfsAbs(s.Root.Path) {
764				return errors.Errorf("rootfs absolute path is required")
765			}
766			return setAdditionalGids(s.Root.Path)
767		}
768		if c.Snapshotter == "" {
769			return errors.Errorf("no snapshotter set for container")
770		}
771		if c.SnapshotKey == "" {
772			return errors.Errorf("rootfs snapshot not created for container")
773		}
774		snapshotter := client.SnapshotService(c.Snapshotter)
775		mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
776		if err != nil {
777			return err
778		}
779		return mount.WithTempMount(ctx, mounts, setAdditionalGids)
780	}
781}
782
783// WithCapabilities sets Linux capabilities on the process
784func WithCapabilities(caps []string) SpecOpts {
785	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
786		setCapabilities(s)
787
788		s.Process.Capabilities.Bounding = caps
789		s.Process.Capabilities.Effective = caps
790		s.Process.Capabilities.Permitted = caps
791		s.Process.Capabilities.Inheritable = caps
792
793		return nil
794	}
795}
796
797func capsContain(caps []string, s string) bool {
798	for _, c := range caps {
799		if c == s {
800			return true
801		}
802	}
803	return false
804}
805
806func removeCap(caps *[]string, s string) {
807	var newcaps []string
808	for _, c := range *caps {
809		if c == s {
810			continue
811		}
812		newcaps = append(newcaps, c)
813	}
814	*caps = newcaps
815}
816
817// WithAddedCapabilities adds the provided capabilities
818func WithAddedCapabilities(caps []string) SpecOpts {
819	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
820		setCapabilities(s)
821		for _, c := range caps {
822			for _, cl := range []*[]string{
823				&s.Process.Capabilities.Bounding,
824				&s.Process.Capabilities.Effective,
825				&s.Process.Capabilities.Permitted,
826				&s.Process.Capabilities.Inheritable,
827			} {
828				if !capsContain(*cl, c) {
829					*cl = append(*cl, c)
830				}
831			}
832		}
833		return nil
834	}
835}
836
837// WithDroppedCapabilities removes the provided capabilities
838func WithDroppedCapabilities(caps []string) SpecOpts {
839	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
840		setCapabilities(s)
841		for _, c := range caps {
842			for _, cl := range []*[]string{
843				&s.Process.Capabilities.Bounding,
844				&s.Process.Capabilities.Effective,
845				&s.Process.Capabilities.Permitted,
846				&s.Process.Capabilities.Inheritable,
847			} {
848				removeCap(cl, c)
849			}
850		}
851		return nil
852	}
853}
854
855// WithAmbientCapabilities set the Linux ambient capabilities for the process
856// Ambient capabilities should only be set for non-root users or the caller should
857// understand how these capabilities are used and set
858func WithAmbientCapabilities(caps []string) SpecOpts {
859	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
860		setCapabilities(s)
861
862		s.Process.Capabilities.Ambient = caps
863		return nil
864	}
865}
866
867// ErrNoUsersFound can be returned from UserFromPath
868var ErrNoUsersFound = errors.New("no users found")
869
870// UserFromPath inspects the user object using /etc/passwd in the specified rootfs.
871// filter can be nil.
872func UserFromPath(root string, filter func(user.User) bool) (user.User, error) {
873	ppath, err := fs.RootPath(root, "/etc/passwd")
874	if err != nil {
875		return user.User{}, err
876	}
877	users, err := user.ParsePasswdFileFilter(ppath, filter)
878	if err != nil {
879		return user.User{}, err
880	}
881	if len(users) == 0 {
882		return user.User{}, ErrNoUsersFound
883	}
884	return users[0], nil
885}
886
887// ErrNoGroupsFound can be returned from GIDFromPath
888var ErrNoGroupsFound = errors.New("no groups found")
889
890// GIDFromPath inspects the GID using /etc/passwd in the specified rootfs.
891// filter can be nil.
892func GIDFromPath(root string, filter func(user.Group) bool) (gid uint32, err error) {
893	gpath, err := fs.RootPath(root, "/etc/group")
894	if err != nil {
895		return 0, err
896	}
897	groups, err := user.ParseGroupFileFilter(gpath, filter)
898	if err != nil {
899		return 0, err
900	}
901	if len(groups) == 0 {
902		return 0, ErrNoGroupsFound
903	}
904	g := groups[0]
905	return uint32(g.Gid), nil
906}
907
908func getSupplementalGroupsFromPath(root string, filter func(user.Group) bool) ([]uint32, error) {
909	gpath, err := fs.RootPath(root, "/etc/group")
910	if err != nil {
911		return []uint32{}, err
912	}
913	groups, err := user.ParseGroupFileFilter(gpath, filter)
914	if err != nil {
915		return []uint32{}, err
916	}
917	if len(groups) == 0 {
918		// if there are no additional groups; just return an empty set
919		return []uint32{}, nil
920	}
921	addlGids := []uint32{}
922	for _, grp := range groups {
923		addlGids = append(addlGids, uint32(grp.Gid))
924	}
925	return addlGids, nil
926}
927
928func isRootfsAbs(root string) bool {
929	return filepath.IsAbs(root)
930}
931
932// WithMaskedPaths sets the masked paths option
933func WithMaskedPaths(paths []string) SpecOpts {
934	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
935		setLinux(s)
936		s.Linux.MaskedPaths = paths
937		return nil
938	}
939}
940
941// WithReadonlyPaths sets the read only paths option
942func WithReadonlyPaths(paths []string) SpecOpts {
943	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
944		setLinux(s)
945		s.Linux.ReadonlyPaths = paths
946		return nil
947	}
948}
949
950// WithWriteableSysfs makes any sysfs mounts writeable
951func WithWriteableSysfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
952	for _, m := range s.Mounts {
953		if m.Type == "sysfs" {
954			for i, o := range m.Options {
955				if o == "ro" {
956					m.Options[i] = "rw"
957				}
958			}
959		}
960	}
961	return nil
962}
963
964// WithWriteableCgroupfs makes any cgroup mounts writeable
965func WithWriteableCgroupfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
966	for _, m := range s.Mounts {
967		if m.Type == "cgroup" {
968			for i, o := range m.Options {
969				if o == "ro" {
970					m.Options[i] = "rw"
971				}
972			}
973		}
974	}
975	return nil
976}
977
978// WithSelinuxLabel sets the process SELinux label
979func WithSelinuxLabel(label string) SpecOpts {
980	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
981		setProcess(s)
982		s.Process.SelinuxLabel = label
983		return nil
984	}
985}
986
987// WithApparmorProfile sets the Apparmor profile for the process
988func WithApparmorProfile(profile string) SpecOpts {
989	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
990		setProcess(s)
991		s.Process.ApparmorProfile = profile
992		return nil
993	}
994}
995
996// WithSeccompUnconfined clears the seccomp profile
997func WithSeccompUnconfined(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
998	setLinux(s)
999	s.Linux.Seccomp = nil
1000	return nil
1001}
1002
1003// WithParentCgroupDevices uses the default cgroup setup to inherit the container's parent cgroup's
1004// allowed and denied devices
1005func WithParentCgroupDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1006	setLinux(s)
1007	if s.Linux.Resources == nil {
1008		s.Linux.Resources = &specs.LinuxResources{}
1009	}
1010	s.Linux.Resources.Devices = nil
1011	return nil
1012}
1013
1014// WithAllDevicesAllowed permits READ WRITE MKNOD on all devices nodes for the container
1015func WithAllDevicesAllowed(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1016	setLinux(s)
1017	if s.Linux.Resources == nil {
1018		s.Linux.Resources = &specs.LinuxResources{}
1019	}
1020	s.Linux.Resources.Devices = []specs.LinuxDeviceCgroup{
1021		{
1022			Allow:  true,
1023			Access: rwm,
1024		},
1025	}
1026	return nil
1027}
1028
1029// WithDefaultUnixDevices adds the default devices for unix such as /dev/null, /dev/random to
1030// the container's resource cgroup spec
1031func WithDefaultUnixDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1032	setLinux(s)
1033	if s.Linux.Resources == nil {
1034		s.Linux.Resources = &specs.LinuxResources{}
1035	}
1036	intptr := func(i int64) *int64 {
1037		return &i
1038	}
1039	s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, []specs.LinuxDeviceCgroup{
1040		{
1041			// "/dev/null",
1042			Type:   "c",
1043			Major:  intptr(1),
1044			Minor:  intptr(3),
1045			Access: rwm,
1046			Allow:  true,
1047		},
1048		{
1049			// "/dev/random",
1050			Type:   "c",
1051			Major:  intptr(1),
1052			Minor:  intptr(8),
1053			Access: rwm,
1054			Allow:  true,
1055		},
1056		{
1057			// "/dev/full",
1058			Type:   "c",
1059			Major:  intptr(1),
1060			Minor:  intptr(7),
1061			Access: rwm,
1062			Allow:  true,
1063		},
1064		{
1065			// "/dev/tty",
1066			Type:   "c",
1067			Major:  intptr(5),
1068			Minor:  intptr(0),
1069			Access: rwm,
1070			Allow:  true,
1071		},
1072		{
1073			// "/dev/zero",
1074			Type:   "c",
1075			Major:  intptr(1),
1076			Minor:  intptr(5),
1077			Access: rwm,
1078			Allow:  true,
1079		},
1080		{
1081			// "/dev/urandom",
1082			Type:   "c",
1083			Major:  intptr(1),
1084			Minor:  intptr(9),
1085			Access: rwm,
1086			Allow:  true,
1087		},
1088		{
1089			// "/dev/console",
1090			Type:   "c",
1091			Major:  intptr(5),
1092			Minor:  intptr(1),
1093			Access: rwm,
1094			Allow:  true,
1095		},
1096		// /dev/pts/ - pts namespaces are "coming soon"
1097		{
1098			Type:   "c",
1099			Major:  intptr(136),
1100			Access: rwm,
1101			Allow:  true,
1102		},
1103		{
1104			Type:   "c",
1105			Major:  intptr(5),
1106			Minor:  intptr(2),
1107			Access: rwm,
1108			Allow:  true,
1109		},
1110		{
1111			// tuntap
1112			Type:   "c",
1113			Major:  intptr(10),
1114			Minor:  intptr(200),
1115			Access: rwm,
1116			Allow:  true,
1117		},
1118	}...)
1119	return nil
1120}
1121
1122// WithPrivileged sets up options for a privileged container
1123var WithPrivileged = Compose(
1124	WithAllCurrentCapabilities,
1125	WithMaskedPaths(nil),
1126	WithReadonlyPaths(nil),
1127	WithWriteableSysfs,
1128	WithWriteableCgroupfs,
1129	WithSelinuxLabel(""),
1130	WithApparmorProfile(""),
1131	WithSeccompUnconfined,
1132)
1133
1134// WithWindowsHyperV sets the Windows.HyperV section for HyperV isolation of containers.
1135func WithWindowsHyperV(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1136	if s.Windows == nil {
1137		s.Windows = &specs.Windows{}
1138	}
1139	if s.Windows.HyperV == nil {
1140		s.Windows.HyperV = &specs.WindowsHyperV{}
1141	}
1142	return nil
1143}
1144
1145// WithMemoryLimit sets the `Linux.LinuxResources.Memory.Limit` section to the
1146// `limit` specified if the `Linux` section is not `nil`. Additionally sets the
1147// `Windows.WindowsResources.Memory.Limit` section if the `Windows` section is
1148// not `nil`.
1149func WithMemoryLimit(limit uint64) SpecOpts {
1150	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1151		if s.Linux != nil {
1152			if s.Linux.Resources == nil {
1153				s.Linux.Resources = &specs.LinuxResources{}
1154			}
1155			if s.Linux.Resources.Memory == nil {
1156				s.Linux.Resources.Memory = &specs.LinuxMemory{}
1157			}
1158			l := int64(limit)
1159			s.Linux.Resources.Memory.Limit = &l
1160		}
1161		if s.Windows != nil {
1162			if s.Windows.Resources == nil {
1163				s.Windows.Resources = &specs.WindowsResources{}
1164			}
1165			if s.Windows.Resources.Memory == nil {
1166				s.Windows.Resources.Memory = &specs.WindowsMemoryResources{}
1167			}
1168			s.Windows.Resources.Memory.Limit = &limit
1169		}
1170		return nil
1171	}
1172}
1173
1174// WithAnnotations appends or replaces the annotations on the spec with the
1175// provided annotations
1176func WithAnnotations(annotations map[string]string) SpecOpts {
1177	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1178		if s.Annotations == nil {
1179			s.Annotations = make(map[string]string)
1180		}
1181		for k, v := range annotations {
1182			s.Annotations[k] = v
1183		}
1184		return nil
1185	}
1186}
1187
1188// WithLinuxDevices adds the provided linux devices to the spec
1189func WithLinuxDevices(devices []specs.LinuxDevice) SpecOpts {
1190	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1191		setLinux(s)
1192		s.Linux.Devices = append(s.Linux.Devices, devices...)
1193		return nil
1194	}
1195}
1196
1197// WithLinuxDevice adds the device specified by path to the spec
1198func WithLinuxDevice(path, permissions string) SpecOpts {
1199	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1200		setLinux(s)
1201		setResources(s)
1202
1203		dev, err := deviceFromPath(path)
1204		if err != nil {
1205			return err
1206		}
1207
1208		s.Linux.Devices = append(s.Linux.Devices, *dev)
1209
1210		s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, specs.LinuxDeviceCgroup{
1211			Type:   dev.Type,
1212			Allow:  true,
1213			Major:  &dev.Major,
1214			Minor:  &dev.Minor,
1215			Access: permissions,
1216		})
1217
1218		return nil
1219	}
1220}
1221
1222// WithEnvFile adds environment variables from a file to the container's spec
1223func WithEnvFile(path string) SpecOpts {
1224	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1225		var vars []string
1226		f, err := os.Open(path)
1227		if err != nil {
1228			return err
1229		}
1230		defer f.Close()
1231
1232		sc := bufio.NewScanner(f)
1233		for sc.Scan() {
1234			vars = append(vars, sc.Text())
1235		}
1236		if err = sc.Err(); err != nil {
1237			return err
1238		}
1239		return WithEnv(vars)(nil, nil, nil, s)
1240	}
1241}
1242
1243// ErrNoShmMount is returned when there is no /dev/shm mount specified in the config
1244// and an Opts was trying to set a configuration value on the mount.
1245var ErrNoShmMount = errors.New("no /dev/shm mount specified")
1246
1247// WithDevShmSize sets the size of the /dev/shm mount for the container.
1248//
1249// The size value is specified in kb, kilobytes.
1250func WithDevShmSize(kb int64) SpecOpts {
1251	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
1252		for _, m := range s.Mounts {
1253			if m.Source == "shm" && m.Type == "tmpfs" {
1254				for i, o := range m.Options {
1255					if strings.HasPrefix(o, "size=") {
1256						m.Options[i] = fmt.Sprintf("size=%dk", kb)
1257						return nil
1258					}
1259				}
1260				m.Options = append(m.Options, fmt.Sprintf("size=%dk", kb))
1261				return nil
1262			}
1263		}
1264		return ErrNoShmMount
1265	}
1266}
1267