1/*
2   Copyright The containerd Authors.
3
4   Licensed under the Apache License, Version 2.0 (the "License");
5   you may not use this file except in compliance with the License.
6   You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10   Unless required by applicable law or agreed to in writing, software
11   distributed under the License is distributed on an "AS IS" BASIS,
12   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   See the License for the specific language governing permissions and
14   limitations under the License.
15*/
16
17package oci
18
19import (
20	"bufio"
21	"context"
22	"encoding/json"
23	"fmt"
24	"io/ioutil"
25	"os"
26	"path/filepath"
27	"strconv"
28	"strings"
29
30	"github.com/containerd/containerd/containers"
31	"github.com/containerd/containerd/content"
32	"github.com/containerd/containerd/images"
33	"github.com/containerd/containerd/mount"
34	"github.com/containerd/containerd/namespaces"
35	"github.com/containerd/containerd/platforms"
36	"github.com/containerd/continuity/fs"
37	v1 "github.com/opencontainers/image-spec/specs-go/v1"
38	"github.com/opencontainers/runc/libcontainer/user"
39	specs "github.com/opencontainers/runtime-spec/specs-go"
40	"github.com/pkg/errors"
41	"github.com/syndtr/gocapability/capability"
42)
43
44// SpecOpts sets spec specific information to a newly generated OCI spec
45type SpecOpts func(context.Context, Client, *containers.Container, *Spec) error
46
47// Compose converts a sequence of spec operations into a single operation
48func Compose(opts ...SpecOpts) SpecOpts {
49	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
50		for _, o := range opts {
51			if err := o(ctx, client, c, s); err != nil {
52				return err
53			}
54		}
55		return nil
56	}
57}
58
59// setProcess sets Process to empty if unset
60func setProcess(s *Spec) {
61	if s.Process == nil {
62		s.Process = &specs.Process{}
63	}
64}
65
66// setRoot sets Root to empty if unset
67func setRoot(s *Spec) {
68	if s.Root == nil {
69		s.Root = &specs.Root{}
70	}
71}
72
73// setLinux sets Linux to empty if unset
74func setLinux(s *Spec) {
75	if s.Linux == nil {
76		s.Linux = &specs.Linux{}
77	}
78}
79
80// nolint
81func setResources(s *Spec) {
82	if s.Linux != nil {
83		if s.Linux.Resources == nil {
84			s.Linux.Resources = &specs.LinuxResources{}
85		}
86	}
87	if s.Windows != nil {
88		if s.Windows.Resources == nil {
89			s.Windows.Resources = &specs.WindowsResources{}
90		}
91	}
92}
93
94// setCapabilities sets Linux Capabilities to empty if unset
95func setCapabilities(s *Spec) {
96	setProcess(s)
97	if s.Process.Capabilities == nil {
98		s.Process.Capabilities = &specs.LinuxCapabilities{}
99	}
100}
101
102// WithDefaultSpec returns a SpecOpts that will populate the spec with default
103// values.
104//
105// Use as the first option to clear the spec, then apply options afterwards.
106func WithDefaultSpec() SpecOpts {
107	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
108		return generateDefaultSpecWithPlatform(ctx, platforms.DefaultString(), c.ID, s)
109	}
110}
111
112// WithDefaultSpecForPlatform returns a SpecOpts that will populate the spec
113// with default values for a given platform.
114//
115// Use as the first option to clear the spec, then apply options afterwards.
116func WithDefaultSpecForPlatform(platform string) SpecOpts {
117	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
118		return generateDefaultSpecWithPlatform(ctx, platform, c.ID, s)
119	}
120}
121
122// WithSpecFromBytes loads the spec from the provided byte slice.
123func WithSpecFromBytes(p []byte) SpecOpts {
124	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
125		*s = Spec{} // make sure spec is cleared.
126		if err := json.Unmarshal(p, s); err != nil {
127			return errors.Wrapf(err, "decoding spec config file failed, current supported OCI runtime-spec : v%s", specs.Version)
128		}
129		return nil
130	}
131}
132
133// WithSpecFromFile loads the specification from the provided filename.
134func WithSpecFromFile(filename string) SpecOpts {
135	return func(ctx context.Context, c Client, container *containers.Container, s *Spec) error {
136		p, err := ioutil.ReadFile(filename)
137		if err != nil {
138			return errors.Wrap(err, "cannot load spec config file")
139		}
140		return WithSpecFromBytes(p)(ctx, c, container, s)
141	}
142}
143
144// WithEnv appends environment variables
145func WithEnv(environmentVariables []string) SpecOpts {
146	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
147		if len(environmentVariables) > 0 {
148			setProcess(s)
149			s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, environmentVariables)
150		}
151		return nil
152	}
153}
154
155// WithDefaultPathEnv sets the $PATH environment variable to the
156// default PATH defined in this package.
157func WithDefaultPathEnv(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
158	s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, defaultUnixEnv)
159	return nil
160}
161
162// replaceOrAppendEnvValues returns the defaults with the overrides either
163// replaced by env key or appended to the list
164func replaceOrAppendEnvValues(defaults, overrides []string) []string {
165	cache := make(map[string]int, len(defaults))
166	results := make([]string, 0, len(defaults))
167	for i, e := range defaults {
168		parts := strings.SplitN(e, "=", 2)
169		results = append(results, e)
170		cache[parts[0]] = i
171	}
172
173	for _, value := range overrides {
174		// Values w/o = means they want this env to be removed/unset.
175		if !strings.Contains(value, "=") {
176			if i, exists := cache[value]; exists {
177				results[i] = "" // Used to indicate it should be removed
178			}
179			continue
180		}
181
182		// Just do a normal set/update
183		parts := strings.SplitN(value, "=", 2)
184		if i, exists := cache[parts[0]]; exists {
185			results[i] = value
186		} else {
187			results = append(results, value)
188		}
189	}
190
191	// Now remove all entries that we want to "unset"
192	for i := 0; i < len(results); i++ {
193		if results[i] == "" {
194			results = append(results[:i], results[i+1:]...)
195			i--
196		}
197	}
198
199	return results
200}
201
202// WithProcessArgs replaces the args on the generated spec
203func WithProcessArgs(args ...string) SpecOpts {
204	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
205		setProcess(s)
206		s.Process.Args = args
207		return nil
208	}
209}
210
211// WithProcessCwd replaces the current working directory on the generated spec
212func WithProcessCwd(cwd string) SpecOpts {
213	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
214		setProcess(s)
215		s.Process.Cwd = cwd
216		return nil
217	}
218}
219
220// WithTTY sets the information on the spec as well as the environment variables for
221// using a TTY
222func WithTTY(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
223	setProcess(s)
224	s.Process.Terminal = true
225	if s.Linux != nil {
226		s.Process.Env = append(s.Process.Env, "TERM=xterm")
227	}
228
229	return nil
230}
231
232// WithTTYSize sets the information on the spec as well as the environment variables for
233// using a TTY
234func WithTTYSize(width, height int) SpecOpts {
235	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
236		setProcess(s)
237		if s.Process.ConsoleSize == nil {
238			s.Process.ConsoleSize = &specs.Box{}
239		}
240		s.Process.ConsoleSize.Width = uint(width)
241		s.Process.ConsoleSize.Height = uint(height)
242		return nil
243	}
244}
245
246// WithHostname sets the container's hostname
247func WithHostname(name string) SpecOpts {
248	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
249		s.Hostname = name
250		return nil
251	}
252}
253
254// WithMounts appends mounts
255func WithMounts(mounts []specs.Mount) SpecOpts {
256	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
257		s.Mounts = append(s.Mounts, mounts...)
258		return nil
259	}
260}
261
262// WithHostNamespace allows a task to run inside the host's linux namespace
263func WithHostNamespace(ns specs.LinuxNamespaceType) SpecOpts {
264	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
265		setLinux(s)
266		for i, n := range s.Linux.Namespaces {
267			if n.Type == ns {
268				s.Linux.Namespaces = append(s.Linux.Namespaces[:i], s.Linux.Namespaces[i+1:]...)
269				return nil
270			}
271		}
272		return nil
273	}
274}
275
276// WithLinuxNamespace uses the passed in namespace for the spec. If a namespace of the same type already exists in the
277// spec, the existing namespace is replaced by the one provided.
278func WithLinuxNamespace(ns specs.LinuxNamespace) SpecOpts {
279	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
280		setLinux(s)
281		for i, n := range s.Linux.Namespaces {
282			if n.Type == ns.Type {
283				before := s.Linux.Namespaces[:i]
284				after := s.Linux.Namespaces[i+1:]
285				s.Linux.Namespaces = append(before, ns)
286				s.Linux.Namespaces = append(s.Linux.Namespaces, after...)
287				return nil
288			}
289		}
290		s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
291		return nil
292	}
293}
294
295// WithNewPrivileges turns off the NoNewPrivileges feature flag in the spec
296func WithNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
297	setProcess(s)
298	s.Process.NoNewPrivileges = false
299
300	return nil
301}
302
303// WithImageConfig configures the spec to from the configuration of an Image
304func WithImageConfig(image Image) SpecOpts {
305	return WithImageConfigArgs(image, nil)
306}
307
308// WithImageConfigArgs configures the spec to from the configuration of an Image with additional args that
309// replaces the CMD of the image
310func WithImageConfigArgs(image Image, args []string) SpecOpts {
311	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
312		ic, err := image.Config(ctx)
313		if err != nil {
314			return err
315		}
316		var (
317			ociimage v1.Image
318			config   v1.ImageConfig
319		)
320		switch ic.MediaType {
321		case v1.MediaTypeImageConfig, images.MediaTypeDockerSchema2Config:
322			p, err := content.ReadBlob(ctx, image.ContentStore(), ic)
323			if err != nil {
324				return err
325			}
326
327			if err := json.Unmarshal(p, &ociimage); err != nil {
328				return err
329			}
330			config = ociimage.Config
331		default:
332			return fmt.Errorf("unknown image config media type %s", ic.MediaType)
333		}
334
335		setProcess(s)
336		if s.Linux != nil {
337			defaults := config.Env
338			if len(defaults) == 0 {
339				defaults = defaultUnixEnv
340			}
341			s.Process.Env = replaceOrAppendEnvValues(defaults, s.Process.Env)
342			cmd := config.Cmd
343			if len(args) > 0 {
344				cmd = args
345			}
346			s.Process.Args = append(config.Entrypoint, cmd...)
347
348			cwd := config.WorkingDir
349			if cwd == "" {
350				cwd = "/"
351			}
352			s.Process.Cwd = cwd
353			if config.User != "" {
354				if err := WithUser(config.User)(ctx, client, c, s); err != nil {
355					return err
356				}
357				return WithAdditionalGIDs(fmt.Sprintf("%d", s.Process.User.UID))(ctx, client, c, s)
358			}
359			// we should query the image's /etc/group for additional GIDs
360			// even if there is no specified user in the image config
361			return WithAdditionalGIDs("root")(ctx, client, c, s)
362		} else if s.Windows != nil {
363			s.Process.Env = replaceOrAppendEnvValues(config.Env, s.Process.Env)
364			cmd := config.Cmd
365			if len(args) > 0 {
366				cmd = args
367			}
368			s.Process.Args = append(config.Entrypoint, cmd...)
369
370			s.Process.Cwd = config.WorkingDir
371			s.Process.User = specs.User{
372				Username: config.User,
373			}
374		} else {
375			return errors.New("spec does not contain Linux or Windows section")
376		}
377		return nil
378	}
379}
380
381// WithRootFSPath specifies unmanaged rootfs path.
382func WithRootFSPath(path string) SpecOpts {
383	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
384		setRoot(s)
385		s.Root.Path = path
386		// Entrypoint is not set here (it's up to caller)
387		return nil
388	}
389}
390
391// WithRootFSReadonly sets specs.Root.Readonly to true
392func WithRootFSReadonly() SpecOpts {
393	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
394		setRoot(s)
395		s.Root.Readonly = true
396		return nil
397	}
398}
399
400// WithNoNewPrivileges sets no_new_privileges on the process for the container
401func WithNoNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
402	setProcess(s)
403	s.Process.NoNewPrivileges = true
404	return nil
405}
406
407// WithHostHostsFile bind-mounts the host's /etc/hosts into the container as readonly
408func WithHostHostsFile(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
409	s.Mounts = append(s.Mounts, specs.Mount{
410		Destination: "/etc/hosts",
411		Type:        "bind",
412		Source:      "/etc/hosts",
413		Options:     []string{"rbind", "ro"},
414	})
415	return nil
416}
417
418// WithHostResolvconf bind-mounts the host's /etc/resolv.conf into the container as readonly
419func WithHostResolvconf(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
420	s.Mounts = append(s.Mounts, specs.Mount{
421		Destination: "/etc/resolv.conf",
422		Type:        "bind",
423		Source:      "/etc/resolv.conf",
424		Options:     []string{"rbind", "ro"},
425	})
426	return nil
427}
428
429// WithHostLocaltime bind-mounts the host's /etc/localtime into the container as readonly
430func WithHostLocaltime(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
431	s.Mounts = append(s.Mounts, specs.Mount{
432		Destination: "/etc/localtime",
433		Type:        "bind",
434		Source:      "/etc/localtime",
435		Options:     []string{"rbind", "ro"},
436	})
437	return nil
438}
439
440// WithUserNamespace sets the uid and gid mappings for the task
441// this can be called multiple times to add more mappings to the generated spec
442func WithUserNamespace(container, host, size uint32) SpecOpts {
443	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
444		var hasUserns bool
445		setLinux(s)
446		for _, ns := range s.Linux.Namespaces {
447			if ns.Type == specs.UserNamespace {
448				hasUserns = true
449				break
450			}
451		}
452		if !hasUserns {
453			s.Linux.Namespaces = append(s.Linux.Namespaces, specs.LinuxNamespace{
454				Type: specs.UserNamespace,
455			})
456		}
457		mapping := specs.LinuxIDMapping{
458			ContainerID: container,
459			HostID:      host,
460			Size:        size,
461		}
462		s.Linux.UIDMappings = append(s.Linux.UIDMappings, mapping)
463		s.Linux.GIDMappings = append(s.Linux.GIDMappings, mapping)
464		return nil
465	}
466}
467
468// WithCgroup sets the container's cgroup path
469func WithCgroup(path string) SpecOpts {
470	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
471		setLinux(s)
472		s.Linux.CgroupsPath = path
473		return nil
474	}
475}
476
477// WithNamespacedCgroup uses the namespace set on the context to create a
478// root directory for containers in the cgroup with the id as the subcgroup
479func WithNamespacedCgroup() SpecOpts {
480	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
481		namespace, err := namespaces.NamespaceRequired(ctx)
482		if err != nil {
483			return err
484		}
485		setLinux(s)
486		s.Linux.CgroupsPath = filepath.Join("/", namespace, c.ID)
487		return nil
488	}
489}
490
491// WithUser sets the user to be used within the container.
492// It accepts a valid user string in OCI Image Spec v1.0.0:
493//   user, uid, user:group, uid:gid, uid:group, user:gid
494func WithUser(userstr string) SpecOpts {
495	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
496		setProcess(s)
497		parts := strings.Split(userstr, ":")
498		switch len(parts) {
499		case 1:
500			v, err := strconv.Atoi(parts[0])
501			if err != nil {
502				// if we cannot parse as a uint they try to see if it is a username
503				return WithUsername(userstr)(ctx, client, c, s)
504			}
505			return WithUserID(uint32(v))(ctx, client, c, s)
506		case 2:
507			var (
508				username  string
509				groupname string
510			)
511			var uid, gid uint32
512			v, err := strconv.Atoi(parts[0])
513			if err != nil {
514				username = parts[0]
515			} else {
516				uid = uint32(v)
517			}
518			if v, err = strconv.Atoi(parts[1]); err != nil {
519				groupname = parts[1]
520			} else {
521				gid = uint32(v)
522			}
523			if username == "" && groupname == "" {
524				s.Process.User.UID, s.Process.User.GID = uid, gid
525				return nil
526			}
527			f := func(root string) error {
528				if username != "" {
529					user, err := getUserFromPath(root, func(u user.User) bool {
530						return u.Name == username
531					})
532					if err != nil {
533						return err
534					}
535					uid = uint32(user.Uid)
536				}
537				if groupname != "" {
538					gid, err = getGIDFromPath(root, func(g user.Group) bool {
539						return g.Name == groupname
540					})
541					if err != nil {
542						return err
543					}
544				}
545				s.Process.User.UID, s.Process.User.GID = uid, gid
546				return nil
547			}
548			if c.Snapshotter == "" && c.SnapshotKey == "" {
549				if !isRootfsAbs(s.Root.Path) {
550					return errors.New("rootfs absolute path is required")
551				}
552				return f(s.Root.Path)
553			}
554			if c.Snapshotter == "" {
555				return errors.New("no snapshotter set for container")
556			}
557			if c.SnapshotKey == "" {
558				return errors.New("rootfs snapshot not created for container")
559			}
560			snapshotter := client.SnapshotService(c.Snapshotter)
561			mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
562			if err != nil {
563				return err
564			}
565			return mount.WithTempMount(ctx, mounts, f)
566		default:
567			return fmt.Errorf("invalid USER value %s", userstr)
568		}
569	}
570}
571
572// WithUIDGID allows the UID and GID for the Process to be set
573func WithUIDGID(uid, gid uint32) SpecOpts {
574	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
575		setProcess(s)
576		s.Process.User.UID = uid
577		s.Process.User.GID = gid
578		return nil
579	}
580}
581
582// WithUserID sets the correct UID and GID for the container based
583// on the image's /etc/passwd contents. If /etc/passwd does not exist,
584// or uid is not found in /etc/passwd, it sets the requested uid,
585// additionally sets the gid to 0, and does not return an error.
586func WithUserID(uid uint32) SpecOpts {
587	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
588		setProcess(s)
589		if c.Snapshotter == "" && c.SnapshotKey == "" {
590			if !isRootfsAbs(s.Root.Path) {
591				return errors.Errorf("rootfs absolute path is required")
592			}
593			user, err := getUserFromPath(s.Root.Path, func(u user.User) bool {
594				return u.Uid == int(uid)
595			})
596			if err != nil {
597				if os.IsNotExist(err) || err == errNoUsersFound {
598					s.Process.User.UID, s.Process.User.GID = uid, 0
599					return nil
600				}
601				return err
602			}
603			s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
604			return nil
605
606		}
607		if c.Snapshotter == "" {
608			return errors.Errorf("no snapshotter set for container")
609		}
610		if c.SnapshotKey == "" {
611			return errors.Errorf("rootfs snapshot not created for container")
612		}
613		snapshotter := client.SnapshotService(c.Snapshotter)
614		mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
615		if err != nil {
616			return err
617		}
618		return mount.WithTempMount(ctx, mounts, func(root string) error {
619			user, err := getUserFromPath(root, func(u user.User) bool {
620				return u.Uid == int(uid)
621			})
622			if err != nil {
623				if os.IsNotExist(err) || err == errNoUsersFound {
624					s.Process.User.UID, s.Process.User.GID = uid, 0
625					return nil
626				}
627				return err
628			}
629			s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
630			return nil
631		})
632	}
633}
634
635// WithUsername sets the correct UID and GID for the container
636// based on the image's /etc/passwd contents. If /etc/passwd
637// does not exist, or the username is not found in /etc/passwd,
638// it returns error.
639func WithUsername(username string) SpecOpts {
640	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
641		setProcess(s)
642		if s.Linux != nil {
643			if c.Snapshotter == "" && c.SnapshotKey == "" {
644				if !isRootfsAbs(s.Root.Path) {
645					return errors.Errorf("rootfs absolute path is required")
646				}
647				user, err := getUserFromPath(s.Root.Path, func(u user.User) bool {
648					return u.Name == username
649				})
650				if err != nil {
651					return err
652				}
653				s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
654				return nil
655			}
656			if c.Snapshotter == "" {
657				return errors.Errorf("no snapshotter set for container")
658			}
659			if c.SnapshotKey == "" {
660				return errors.Errorf("rootfs snapshot not created for container")
661			}
662			snapshotter := client.SnapshotService(c.Snapshotter)
663			mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
664			if err != nil {
665				return err
666			}
667			return mount.WithTempMount(ctx, mounts, func(root string) error {
668				user, err := getUserFromPath(root, func(u user.User) bool {
669					return u.Name == username
670				})
671				if err != nil {
672					return err
673				}
674				s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid)
675				return nil
676			})
677		} else if s.Windows != nil {
678			s.Process.User.Username = username
679		} else {
680			return errors.New("spec does not contain Linux or Windows section")
681		}
682		return nil
683	}
684}
685
686// WithAdditionalGIDs sets the OCI spec's additionalGids array to any additional groups listed
687// for a particular user in the /etc/groups file of the image's root filesystem
688// The passed in user can be either a uid or a username.
689func WithAdditionalGIDs(userstr string) SpecOpts {
690	return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) {
691		// For LCOW additional GID's not supported
692		if s.Windows != nil {
693			return nil
694		}
695		setProcess(s)
696		setAdditionalGids := func(root string) error {
697			var username string
698			uid, err := strconv.Atoi(userstr)
699			if err == nil {
700				user, err := getUserFromPath(root, func(u user.User) bool {
701					return u.Uid == uid
702				})
703				if err != nil {
704					if os.IsNotExist(err) || err == errNoUsersFound {
705						return nil
706					}
707					return err
708				}
709				username = user.Name
710			} else {
711				username = userstr
712			}
713			gids, err := getSupplementalGroupsFromPath(root, func(g user.Group) bool {
714				// we only want supplemental groups
715				if g.Name == username {
716					return false
717				}
718				for _, entry := range g.List {
719					if entry == username {
720						return true
721					}
722				}
723				return false
724			})
725			if err != nil {
726				if os.IsNotExist(err) {
727					return nil
728				}
729				return err
730			}
731			s.Process.User.AdditionalGids = gids
732			return nil
733		}
734		if c.Snapshotter == "" && c.SnapshotKey == "" {
735			if !isRootfsAbs(s.Root.Path) {
736				return errors.Errorf("rootfs absolute path is required")
737			}
738			return setAdditionalGids(s.Root.Path)
739		}
740		if c.Snapshotter == "" {
741			return errors.Errorf("no snapshotter set for container")
742		}
743		if c.SnapshotKey == "" {
744			return errors.Errorf("rootfs snapshot not created for container")
745		}
746		snapshotter := client.SnapshotService(c.Snapshotter)
747		mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
748		if err != nil {
749			return err
750		}
751		return mount.WithTempMount(ctx, mounts, setAdditionalGids)
752	}
753}
754
755// WithCapabilities sets Linux capabilities on the process
756func WithCapabilities(caps []string) SpecOpts {
757	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
758		setCapabilities(s)
759
760		s.Process.Capabilities.Bounding = caps
761		s.Process.Capabilities.Effective = caps
762		s.Process.Capabilities.Permitted = caps
763		s.Process.Capabilities.Inheritable = caps
764
765		return nil
766	}
767}
768
769// WithAllCapabilities sets all linux capabilities for the process
770var WithAllCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
771	return WithCapabilities(GetAllCapabilities())(ctx, client, c, s)
772}
773
774// GetAllCapabilities returns all caps up to CAP_LAST_CAP
775// or CAP_BLOCK_SUSPEND on RHEL6
776func GetAllCapabilities() []string {
777	last := capability.CAP_LAST_CAP
778	// hack for RHEL6 which has no /proc/sys/kernel/cap_last_cap
779	if last == capability.Cap(63) {
780		last = capability.CAP_BLOCK_SUSPEND
781	}
782	var caps []string
783	for _, cap := range capability.List() {
784		if cap > last {
785			continue
786		}
787		caps = append(caps, "CAP_"+strings.ToUpper(cap.String()))
788	}
789	return caps
790}
791
792func capsContain(caps []string, s string) bool {
793	for _, c := range caps {
794		if c == s {
795			return true
796		}
797	}
798	return false
799}
800
801func removeCap(caps *[]string, s string) {
802	var newcaps []string
803	for _, c := range *caps {
804		if c == s {
805			continue
806		}
807		newcaps = append(newcaps, c)
808	}
809	*caps = newcaps
810}
811
812// WithAddedCapabilities adds the provided capabilities
813func WithAddedCapabilities(caps []string) SpecOpts {
814	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
815		setCapabilities(s)
816		for _, c := range caps {
817			for _, cl := range []*[]string{
818				&s.Process.Capabilities.Bounding,
819				&s.Process.Capabilities.Effective,
820				&s.Process.Capabilities.Permitted,
821				&s.Process.Capabilities.Inheritable,
822			} {
823				if !capsContain(*cl, c) {
824					*cl = append(*cl, c)
825				}
826			}
827		}
828		return nil
829	}
830}
831
832// WithDroppedCapabilities removes the provided capabilities
833func WithDroppedCapabilities(caps []string) SpecOpts {
834	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
835		setCapabilities(s)
836		for _, c := range caps {
837			for _, cl := range []*[]string{
838				&s.Process.Capabilities.Bounding,
839				&s.Process.Capabilities.Effective,
840				&s.Process.Capabilities.Permitted,
841				&s.Process.Capabilities.Inheritable,
842			} {
843				removeCap(cl, c)
844			}
845		}
846		return nil
847	}
848}
849
850// WithAmbientCapabilities set the Linux ambient capabilities for the process
851// Ambient capabilities should only be set for non-root users or the caller should
852// understand how these capabilities are used and set
853func WithAmbientCapabilities(caps []string) SpecOpts {
854	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
855		setCapabilities(s)
856
857		s.Process.Capabilities.Ambient = caps
858		return nil
859	}
860}
861
862var errNoUsersFound = errors.New("no users found")
863
864func getUserFromPath(root string, filter func(user.User) bool) (user.User, error) {
865	ppath, err := fs.RootPath(root, "/etc/passwd")
866	if err != nil {
867		return user.User{}, err
868	}
869	users, err := user.ParsePasswdFileFilter(ppath, filter)
870	if err != nil {
871		return user.User{}, err
872	}
873	if len(users) == 0 {
874		return user.User{}, errNoUsersFound
875	}
876	return users[0], nil
877}
878
879var errNoGroupsFound = errors.New("no groups found")
880
881func getGIDFromPath(root string, filter func(user.Group) bool) (gid uint32, err error) {
882	gpath, err := fs.RootPath(root, "/etc/group")
883	if err != nil {
884		return 0, err
885	}
886	groups, err := user.ParseGroupFileFilter(gpath, filter)
887	if err != nil {
888		return 0, err
889	}
890	if len(groups) == 0 {
891		return 0, errNoGroupsFound
892	}
893	g := groups[0]
894	return uint32(g.Gid), nil
895}
896
897func getSupplementalGroupsFromPath(root string, filter func(user.Group) bool) ([]uint32, error) {
898	gpath, err := fs.RootPath(root, "/etc/group")
899	if err != nil {
900		return []uint32{}, err
901	}
902	groups, err := user.ParseGroupFileFilter(gpath, filter)
903	if err != nil {
904		return []uint32{}, err
905	}
906	if len(groups) == 0 {
907		// if there are no additional groups; just return an empty set
908		return []uint32{}, nil
909	}
910	addlGids := []uint32{}
911	for _, grp := range groups {
912		addlGids = append(addlGids, uint32(grp.Gid))
913	}
914	return addlGids, nil
915}
916
917func isRootfsAbs(root string) bool {
918	return filepath.IsAbs(root)
919}
920
921// WithMaskedPaths sets the masked paths option
922func WithMaskedPaths(paths []string) SpecOpts {
923	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
924		setLinux(s)
925		s.Linux.MaskedPaths = paths
926		return nil
927	}
928}
929
930// WithReadonlyPaths sets the read only paths option
931func WithReadonlyPaths(paths []string) SpecOpts {
932	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
933		setLinux(s)
934		s.Linux.ReadonlyPaths = paths
935		return nil
936	}
937}
938
939// WithWriteableSysfs makes any sysfs mounts writeable
940func WithWriteableSysfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
941	for i, m := range s.Mounts {
942		if m.Type == "sysfs" {
943			var options []string
944			for _, o := range m.Options {
945				if o == "ro" {
946					o = "rw"
947				}
948				options = append(options, o)
949			}
950			s.Mounts[i].Options = options
951		}
952	}
953	return nil
954}
955
956// WithWriteableCgroupfs makes any cgroup mounts writeable
957func WithWriteableCgroupfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
958	for i, m := range s.Mounts {
959		if m.Type == "cgroup" {
960			var options []string
961			for _, o := range m.Options {
962				if o == "ro" {
963					o = "rw"
964				}
965				options = append(options, o)
966			}
967			s.Mounts[i].Options = options
968		}
969	}
970	return nil
971}
972
973// WithSelinuxLabel sets the process SELinux label
974func WithSelinuxLabel(label string) SpecOpts {
975	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
976		setProcess(s)
977		s.Process.SelinuxLabel = label
978		return nil
979	}
980}
981
982// WithApparmorProfile sets the Apparmor profile for the process
983func WithApparmorProfile(profile string) SpecOpts {
984	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
985		setProcess(s)
986		s.Process.ApparmorProfile = profile
987		return nil
988	}
989}
990
991// WithSeccompUnconfined clears the seccomp profile
992func WithSeccompUnconfined(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
993	setLinux(s)
994	s.Linux.Seccomp = nil
995	return nil
996}
997
998// WithParentCgroupDevices uses the default cgroup setup to inherit the container's parent cgroup's
999// allowed and denied devices
1000func WithParentCgroupDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1001	setLinux(s)
1002	if s.Linux.Resources == nil {
1003		s.Linux.Resources = &specs.LinuxResources{}
1004	}
1005	s.Linux.Resources.Devices = nil
1006	return nil
1007}
1008
1009// WithDefaultUnixDevices adds the default devices for unix such as /dev/null, /dev/random to
1010// the container's resource cgroup spec
1011func WithDefaultUnixDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1012	setLinux(s)
1013	if s.Linux.Resources == nil {
1014		s.Linux.Resources = &specs.LinuxResources{}
1015	}
1016	intptr := func(i int64) *int64 {
1017		return &i
1018	}
1019	s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, []specs.LinuxDeviceCgroup{
1020		{
1021			// "/dev/null",
1022			Type:   "c",
1023			Major:  intptr(1),
1024			Minor:  intptr(3),
1025			Access: rwm,
1026			Allow:  true,
1027		},
1028		{
1029			// "/dev/random",
1030			Type:   "c",
1031			Major:  intptr(1),
1032			Minor:  intptr(8),
1033			Access: rwm,
1034			Allow:  true,
1035		},
1036		{
1037			// "/dev/full",
1038			Type:   "c",
1039			Major:  intptr(1),
1040			Minor:  intptr(7),
1041			Access: rwm,
1042			Allow:  true,
1043		},
1044		{
1045			// "/dev/tty",
1046			Type:   "c",
1047			Major:  intptr(5),
1048			Minor:  intptr(0),
1049			Access: rwm,
1050			Allow:  true,
1051		},
1052		{
1053			// "/dev/zero",
1054			Type:   "c",
1055			Major:  intptr(1),
1056			Minor:  intptr(5),
1057			Access: rwm,
1058			Allow:  true,
1059		},
1060		{
1061			// "/dev/urandom",
1062			Type:   "c",
1063			Major:  intptr(1),
1064			Minor:  intptr(9),
1065			Access: rwm,
1066			Allow:  true,
1067		},
1068		{
1069			// "/dev/console",
1070			Type:   "c",
1071			Major:  intptr(5),
1072			Minor:  intptr(1),
1073			Access: rwm,
1074			Allow:  true,
1075		},
1076		// /dev/pts/ - pts namespaces are "coming soon"
1077		{
1078			Type:   "c",
1079			Major:  intptr(136),
1080			Access: rwm,
1081			Allow:  true,
1082		},
1083		{
1084			Type:   "c",
1085			Major:  intptr(5),
1086			Minor:  intptr(2),
1087			Access: rwm,
1088			Allow:  true,
1089		},
1090		{
1091			// tuntap
1092			Type:   "c",
1093			Major:  intptr(10),
1094			Minor:  intptr(200),
1095			Access: rwm,
1096			Allow:  true,
1097		},
1098	}...)
1099	return nil
1100}
1101
1102// WithPrivileged sets up options for a privileged container
1103// TODO(justincormack) device handling
1104var WithPrivileged = Compose(
1105	WithAllCapabilities,
1106	WithMaskedPaths(nil),
1107	WithReadonlyPaths(nil),
1108	WithWriteableSysfs,
1109	WithWriteableCgroupfs,
1110	WithSelinuxLabel(""),
1111	WithApparmorProfile(""),
1112	WithSeccompUnconfined,
1113)
1114
1115// WithWindowsHyperV sets the Windows.HyperV section for HyperV isolation of containers.
1116func WithWindowsHyperV(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1117	if s.Windows == nil {
1118		s.Windows = &specs.Windows{}
1119	}
1120	if s.Windows.HyperV == nil {
1121		s.Windows.HyperV = &specs.WindowsHyperV{}
1122	}
1123	return nil
1124}
1125
1126// WithMemoryLimit sets the `Linux.LinuxResources.Memory.Limit` section to the
1127// `limit` specified if the `Linux` section is not `nil`. Additionally sets the
1128// `Windows.WindowsResources.Memory.Limit` section if the `Windows` section is
1129// not `nil`.
1130func WithMemoryLimit(limit uint64) SpecOpts {
1131	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1132		if s.Linux != nil {
1133			if s.Linux.Resources == nil {
1134				s.Linux.Resources = &specs.LinuxResources{}
1135			}
1136			if s.Linux.Resources.Memory == nil {
1137				s.Linux.Resources.Memory = &specs.LinuxMemory{}
1138			}
1139			l := int64(limit)
1140			s.Linux.Resources.Memory.Limit = &l
1141		}
1142		if s.Windows != nil {
1143			if s.Windows.Resources == nil {
1144				s.Windows.Resources = &specs.WindowsResources{}
1145			}
1146			if s.Windows.Resources.Memory == nil {
1147				s.Windows.Resources.Memory = &specs.WindowsMemoryResources{}
1148			}
1149			s.Windows.Resources.Memory.Limit = &limit
1150		}
1151		return nil
1152	}
1153}
1154
1155// WithAnnotations appends or replaces the annotations on the spec with the
1156// provided annotations
1157func WithAnnotations(annotations map[string]string) SpecOpts {
1158	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1159		if s.Annotations == nil {
1160			s.Annotations = make(map[string]string)
1161		}
1162		for k, v := range annotations {
1163			s.Annotations[k] = v
1164		}
1165		return nil
1166	}
1167}
1168
1169// WithLinuxDevices adds the provided linux devices to the spec
1170func WithLinuxDevices(devices []specs.LinuxDevice) SpecOpts {
1171	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1172		setLinux(s)
1173		s.Linux.Devices = append(s.Linux.Devices, devices...)
1174		return nil
1175	}
1176}
1177
1178var ErrNotADevice = errors.New("not a device node")
1179
1180// WithLinuxDevice adds the device specified by path to the spec
1181func WithLinuxDevice(path, permissions string) SpecOpts {
1182	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1183		setLinux(s)
1184		setResources(s)
1185
1186		dev, err := deviceFromPath(path, permissions)
1187		if err != nil {
1188			return err
1189		}
1190
1191		s.Linux.Devices = append(s.Linux.Devices, *dev)
1192
1193		s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, specs.LinuxDeviceCgroup{
1194			Type:   dev.Type,
1195			Allow:  true,
1196			Major:  &dev.Major,
1197			Minor:  &dev.Minor,
1198			Access: permissions,
1199		})
1200
1201		return nil
1202	}
1203}
1204
1205// WithEnvFile adds environment variables from a file to the container's spec
1206func WithEnvFile(path string) SpecOpts {
1207	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
1208		var vars []string
1209		f, err := os.Open(path)
1210		if err != nil {
1211			return err
1212		}
1213		defer f.Close()
1214
1215		sc := bufio.NewScanner(f)
1216		for sc.Scan() {
1217			if sc.Err() != nil {
1218				return sc.Err()
1219			}
1220			vars = append(vars, sc.Text())
1221		}
1222		return WithEnv(vars)(nil, nil, nil, s)
1223	}
1224}
1225
1226// ErrNoShmMount is returned when there is no /dev/shm mount specified in the config
1227// and an Opts was trying to set a configuration value on the mount.
1228var ErrNoShmMount = errors.New("no /dev/shm mount specified")
1229
1230// WithDevShmSize sets the size of the /dev/shm mount for the container.
1231//
1232// The size value is specified in kb, kilobytes.
1233func WithDevShmSize(kb int64) SpecOpts {
1234	return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
1235		for _, m := range s.Mounts {
1236			if m.Source == "shm" && m.Type == "tmpfs" {
1237				for i, o := range m.Options {
1238					if strings.HasPrefix(o, "size=") {
1239						m.Options[i] = fmt.Sprintf("size=%dk", kb)
1240						return nil
1241					}
1242				}
1243				m.Options = append(m.Options, fmt.Sprintf("size=%dk", kb))
1244				return nil
1245			}
1246		}
1247		return ErrNoShmMount
1248	}
1249}
1250