1// +build linux
2
3/*
4   Copyright The containerd Authors.
5
6   Licensed under the Apache License, Version 2.0 (the "License");
7   you may not use this file except in compliance with the License.
8   You may obtain a copy of the License at
9
10       http://www.apache.org/licenses/LICENSE-2.0
11
12   Unless required by applicable law or agreed to in writing, software
13   distributed under the License is distributed on an "AS IS" BASIS,
14   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   See the License for the specific language governing permissions and
16   limitations under the License.
17*/
18
19package linux
20
21import (
22	"context"
23	"fmt"
24	"io"
25	"io/ioutil"
26	"os"
27	"path/filepath"
28	"time"
29
30	eventstypes "github.com/containerd/containerd/api/events"
31	"github.com/containerd/containerd/api/types"
32	"github.com/containerd/containerd/containers"
33	"github.com/containerd/containerd/errdefs"
34	"github.com/containerd/containerd/events/exchange"
35	"github.com/containerd/containerd/identifiers"
36	"github.com/containerd/containerd/log"
37	"github.com/containerd/containerd/metadata"
38	"github.com/containerd/containerd/mount"
39	"github.com/containerd/containerd/namespaces"
40	"github.com/containerd/containerd/pkg/process"
41	"github.com/containerd/containerd/platforms"
42	"github.com/containerd/containerd/plugin"
43	"github.com/containerd/containerd/runtime"
44	"github.com/containerd/containerd/runtime/linux/runctypes"
45	v1 "github.com/containerd/containerd/runtime/v1"
46	shim "github.com/containerd/containerd/runtime/v1/shim/v1"
47	runc "github.com/containerd/go-runc"
48	"github.com/containerd/typeurl"
49	ptypes "github.com/gogo/protobuf/types"
50	ocispec "github.com/opencontainers/image-spec/specs-go/v1"
51	"github.com/pkg/errors"
52	"github.com/sirupsen/logrus"
53	"golang.org/x/sys/unix"
54)
55
56var (
57	pluginID = fmt.Sprintf("%s.%s", plugin.RuntimePlugin, "linux")
58	empty    = &ptypes.Empty{}
59)
60
61const (
62	configFilename = "config.json"
63	defaultRuntime = "runc"
64	defaultShim    = "containerd-shim"
65
66	// cleanupTimeout is default timeout for cleanup operations
67	cleanupTimeout = 1 * time.Minute
68)
69
70func init() {
71	plugin.Register(&plugin.Registration{
72		Type:   plugin.RuntimePlugin,
73		ID:     "linux",
74		InitFn: New,
75		Requires: []plugin.Type{
76			plugin.MetadataPlugin,
77		},
78		Config: &Config{
79			Shim:    defaultShim,
80			Runtime: defaultRuntime,
81		},
82	})
83}
84
85var _ = (runtime.PlatformRuntime)(&Runtime{})
86
87// Config options for the runtime
88type Config struct {
89	// Shim is a path or name of binary implementing the Shim GRPC API
90	Shim string `toml:"shim"`
91	// Runtime is a path or name of an OCI runtime used by the shim
92	Runtime string `toml:"runtime"`
93	// RuntimeRoot is the path that shall be used by the OCI runtime for its data
94	RuntimeRoot string `toml:"runtime_root"`
95	// NoShim calls runc directly from within the pkg
96	NoShim bool `toml:"no_shim"`
97	// Debug enable debug on the shim
98	ShimDebug bool `toml:"shim_debug"`
99}
100
101// New returns a configured runtime
102func New(ic *plugin.InitContext) (interface{}, error) {
103	ic.Meta.Platforms = []ocispec.Platform{platforms.DefaultSpec()}
104
105	if err := os.MkdirAll(ic.Root, 0711); err != nil {
106		return nil, err
107	}
108	if err := os.MkdirAll(ic.State, 0711); err != nil {
109		return nil, err
110	}
111	m, err := ic.Get(plugin.MetadataPlugin)
112	if err != nil {
113		return nil, err
114	}
115	cfg := ic.Config.(*Config)
116	r := &Runtime{
117		root:       ic.Root,
118		state:      ic.State,
119		tasks:      runtime.NewTaskList(),
120		containers: metadata.NewContainerStore(m.(*metadata.DB)),
121		address:    ic.Address,
122		events:     ic.Events,
123		config:     cfg,
124	}
125	tasks, err := r.restoreTasks(ic.Context)
126	if err != nil {
127		return nil, err
128	}
129	for _, t := range tasks {
130		if err := r.tasks.AddWithNamespace(t.namespace, t); err != nil {
131			return nil, err
132		}
133	}
134	return r, nil
135}
136
137// Runtime for a linux based system
138type Runtime struct {
139	root    string
140	state   string
141	address string
142
143	tasks      *runtime.TaskList
144	containers containers.Store
145	events     *exchange.Exchange
146
147	config *Config
148}
149
150// ID of the runtime
151func (r *Runtime) ID() string {
152	return pluginID
153}
154
155// Create a new task
156func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts) (_ runtime.Task, err error) {
157	namespace, err := namespaces.NamespaceRequired(ctx)
158	if err != nil {
159		return nil, err
160	}
161
162	if err := identifiers.Validate(id); err != nil {
163		return nil, errors.Wrapf(err, "invalid task id")
164	}
165
166	ropts, err := r.getRuncOptions(ctx, id)
167	if err != nil {
168		return nil, err
169	}
170
171	bundle, err := newBundle(id,
172		filepath.Join(r.state, namespace),
173		filepath.Join(r.root, namespace),
174		opts.Spec.Value)
175	if err != nil {
176		return nil, err
177	}
178	defer func() {
179		if err != nil {
180			bundle.Delete()
181		}
182	}()
183
184	shimopt := ShimLocal(r.config, r.events)
185	if !r.config.NoShim {
186		var cgroup string
187		if opts.TaskOptions != nil {
188			v, err := typeurl.UnmarshalAny(opts.TaskOptions)
189			if err != nil {
190				return nil, err
191			}
192			cgroup = v.(*runctypes.CreateOptions).ShimCgroup
193		}
194		exitHandler := func() {
195			log.G(ctx).WithField("id", id).Info("shim reaped")
196
197			if _, err := r.tasks.Get(ctx, id); err != nil {
198				// Task was never started or was already successfully deleted
199				return
200			}
201
202			if err = r.cleanupAfterDeadShim(context.Background(), bundle, namespace, id); err != nil {
203				log.G(ctx).WithError(err).WithFields(logrus.Fields{
204					"id":        id,
205					"namespace": namespace,
206				}).Warn("failed to clean up after killed shim")
207			}
208		}
209		shimopt = ShimRemote(r.config, r.address, cgroup, exitHandler)
210	}
211
212	s, err := bundle.NewShimClient(ctx, namespace, shimopt, ropts)
213	if err != nil {
214		return nil, err
215	}
216	defer func() {
217		if err != nil {
218			deferCtx, deferCancel := context.WithTimeout(
219				namespaces.WithNamespace(context.TODO(), namespace), cleanupTimeout)
220			defer deferCancel()
221			if kerr := s.KillShim(deferCtx); kerr != nil {
222				log.G(ctx).WithError(kerr).Error("failed to kill shim")
223			}
224		}
225	}()
226
227	rt := r.config.Runtime
228	if ropts != nil && ropts.Runtime != "" {
229		rt = ropts.Runtime
230	}
231	sopts := &shim.CreateTaskRequest{
232		ID:         id,
233		Bundle:     bundle.path,
234		Runtime:    rt,
235		Stdin:      opts.IO.Stdin,
236		Stdout:     opts.IO.Stdout,
237		Stderr:     opts.IO.Stderr,
238		Terminal:   opts.IO.Terminal,
239		Checkpoint: opts.Checkpoint,
240		Options:    opts.TaskOptions,
241	}
242	for _, m := range opts.Rootfs {
243		sopts.Rootfs = append(sopts.Rootfs, &types.Mount{
244			Type:    m.Type,
245			Source:  m.Source,
246			Options: m.Options,
247		})
248	}
249	cr, err := s.Create(ctx, sopts)
250	if err != nil {
251		return nil, errdefs.FromGRPC(err)
252	}
253	t, err := newTask(id, namespace, int(cr.Pid), s, r.events, r.tasks, bundle)
254	if err != nil {
255		return nil, err
256	}
257	if err := r.tasks.Add(ctx, t); err != nil {
258		return nil, err
259	}
260	r.events.Publish(ctx, runtime.TaskCreateEventTopic, &eventstypes.TaskCreate{
261		ContainerID: sopts.ID,
262		Bundle:      sopts.Bundle,
263		Rootfs:      sopts.Rootfs,
264		IO: &eventstypes.TaskIO{
265			Stdin:    sopts.Stdin,
266			Stdout:   sopts.Stdout,
267			Stderr:   sopts.Stderr,
268			Terminal: sopts.Terminal,
269		},
270		Checkpoint: sopts.Checkpoint,
271		Pid:        uint32(t.pid),
272	})
273
274	return t, nil
275}
276
277// Tasks returns all tasks known to the runtime
278func (r *Runtime) Tasks(ctx context.Context, all bool) ([]runtime.Task, error) {
279	return r.tasks.GetAll(ctx, all)
280}
281
282func (r *Runtime) restoreTasks(ctx context.Context) ([]*Task, error) {
283	dir, err := ioutil.ReadDir(r.state)
284	if err != nil {
285		return nil, err
286	}
287	var o []*Task
288	for _, namespace := range dir {
289		if !namespace.IsDir() {
290			continue
291		}
292		name := namespace.Name()
293		// skip hidden directories
294		if len(name) > 0 && name[0] == '.' {
295			continue
296		}
297		log.G(ctx).WithField("namespace", name).Debug("loading tasks in namespace")
298		tasks, err := r.loadTasks(ctx, name)
299		if err != nil {
300			return nil, err
301		}
302		o = append(o, tasks...)
303	}
304	return o, nil
305}
306
307// Get a specific task by task id
308func (r *Runtime) Get(ctx context.Context, id string) (runtime.Task, error) {
309	return r.tasks.Get(ctx, id)
310}
311
312// Add a runtime task
313func (r *Runtime) Add(ctx context.Context, task runtime.Task) error {
314	return r.tasks.Add(ctx, task)
315}
316
317// Delete a runtime task
318func (r *Runtime) Delete(ctx context.Context, id string) {
319	r.tasks.Delete(ctx, id)
320}
321
322func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) {
323	dir, err := ioutil.ReadDir(filepath.Join(r.state, ns))
324	if err != nil {
325		return nil, err
326	}
327	var o []*Task
328	for _, path := range dir {
329		if !path.IsDir() {
330			continue
331		}
332		id := path.Name()
333		// skip hidden directories
334		if len(id) > 0 && id[0] == '.' {
335			continue
336		}
337		bundle := loadBundle(
338			id,
339			filepath.Join(r.state, ns, id),
340			filepath.Join(r.root, ns, id),
341		)
342		ctx = namespaces.WithNamespace(ctx, ns)
343		pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, process.InitPidFile))
344		shimExit := make(chan struct{})
345		s, err := bundle.NewShimClient(ctx, ns, ShimConnect(r.config, func() {
346			defer close(shimExit)
347			if _, err := r.tasks.Get(ctx, id); err != nil {
348				// Task was never started or was already successfully deleted
349				return
350			}
351
352			if err := r.cleanupAfterDeadShim(ctx, bundle, ns, id); err != nil {
353				log.G(ctx).WithError(err).WithField("bundle", bundle.path).
354					Error("cleaning up after dead shim")
355			}
356		}), nil)
357		if err != nil {
358			log.G(ctx).WithError(err).WithFields(logrus.Fields{
359				"id":        id,
360				"namespace": ns,
361			}).Error("connecting to shim")
362			err := r.cleanupAfterDeadShim(ctx, bundle, ns, id)
363			if err != nil {
364				log.G(ctx).WithError(err).WithField("bundle", bundle.path).
365					Error("cleaning up after dead shim")
366			}
367			continue
368		}
369
370		logDirPath := filepath.Join(r.root, ns, id)
371
372		copyAndClose := func(dst io.Writer, src io.ReadWriteCloser) {
373			copyDone := make(chan struct{})
374			go func() {
375				io.Copy(dst, src)
376				close(copyDone)
377			}()
378			select {
379			case <-shimExit:
380			case <-copyDone:
381			}
382			src.Close()
383		}
384		shimStdoutLog, err := v1.OpenShimStdoutLog(ctx, logDirPath)
385		if err != nil {
386			log.G(ctx).WithError(err).WithFields(logrus.Fields{
387				"id":         id,
388				"namespace":  ns,
389				"logDirPath": logDirPath,
390			}).Error("opening shim stdout log pipe")
391			continue
392		}
393		if r.config.ShimDebug {
394			go copyAndClose(os.Stdout, shimStdoutLog)
395		} else {
396			go copyAndClose(ioutil.Discard, shimStdoutLog)
397		}
398
399		shimStderrLog, err := v1.OpenShimStderrLog(ctx, logDirPath)
400		if err != nil {
401			log.G(ctx).WithError(err).WithFields(logrus.Fields{
402				"id":         id,
403				"namespace":  ns,
404				"logDirPath": logDirPath,
405			}).Error("opening shim stderr log pipe")
406			continue
407		}
408		if r.config.ShimDebug {
409			go copyAndClose(os.Stderr, shimStderrLog)
410		} else {
411			go copyAndClose(ioutil.Discard, shimStderrLog)
412		}
413
414		t, err := newTask(id, ns, pid, s, r.events, r.tasks, bundle)
415		if err != nil {
416			log.G(ctx).WithError(err).Error("loading task type")
417			continue
418		}
419		o = append(o, t)
420	}
421	return o, nil
422}
423
424func (r *Runtime) cleanupAfterDeadShim(ctx context.Context, bundle *bundle, ns, id string) error {
425	log.G(ctx).WithFields(logrus.Fields{
426		"id":        id,
427		"namespace": ns,
428	}).Warn("cleaning up after shim dead")
429
430	pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, process.InitPidFile))
431	ctx = namespaces.WithNamespace(ctx, ns)
432	if err := r.terminate(ctx, bundle, ns, id); err != nil {
433		if r.config.ShimDebug {
434			return errors.Wrap(err, "failed to terminate task, leaving bundle for debugging")
435		}
436		log.G(ctx).WithError(err).Warn("failed to terminate task")
437	}
438
439	// Notify Client
440	exitedAt := time.Now().UTC()
441	r.events.Publish(ctx, runtime.TaskExitEventTopic, &eventstypes.TaskExit{
442		ContainerID: id,
443		ID:          id,
444		Pid:         uint32(pid),
445		ExitStatus:  128 + uint32(unix.SIGKILL),
446		ExitedAt:    exitedAt,
447	})
448
449	r.tasks.Delete(ctx, id)
450	if err := bundle.Delete(); err != nil {
451		log.G(ctx).WithError(err).Error("delete bundle")
452	}
453	// kill shim
454	if shimPid, err := runc.ReadPidFile(filepath.Join(bundle.path, "shim.pid")); err == nil && shimPid > 0 {
455		unix.Kill(shimPid, unix.SIGKILL)
456	}
457
458	r.events.Publish(ctx, runtime.TaskDeleteEventTopic, &eventstypes.TaskDelete{
459		ContainerID: id,
460		Pid:         uint32(pid),
461		ExitStatus:  128 + uint32(unix.SIGKILL),
462		ExitedAt:    exitedAt,
463	})
464
465	return nil
466}
467
468func (r *Runtime) terminate(ctx context.Context, bundle *bundle, ns, id string) error {
469	rt, err := r.getRuntime(ctx, ns, id)
470	if err != nil {
471		return err
472	}
473	if err := rt.Delete(ctx, id, &runc.DeleteOpts{
474		Force: true,
475	}); err != nil {
476		log.G(ctx).WithError(err).Warnf("delete runtime state %s", id)
477	}
478	if err := mount.Unmount(filepath.Join(bundle.path, "rootfs"), 0); err != nil {
479		log.G(ctx).WithError(err).WithFields(logrus.Fields{
480			"path": bundle.path,
481			"id":   id,
482		}).Warnf("unmount task rootfs")
483	}
484	return nil
485}
486
487func (r *Runtime) getRuntime(ctx context.Context, ns, id string) (*runc.Runc, error) {
488	ropts, err := r.getRuncOptions(ctx, id)
489	if err != nil {
490		return nil, err
491	}
492
493	var (
494		cmd  = r.config.Runtime
495		root = process.RuncRoot
496	)
497	if ropts != nil {
498		if ropts.Runtime != "" {
499			cmd = ropts.Runtime
500		}
501		if ropts.RuntimeRoot != "" {
502			root = ropts.RuntimeRoot
503		}
504	}
505
506	return &runc.Runc{
507		Command:      cmd,
508		LogFormat:    runc.JSON,
509		PdeathSignal: unix.SIGKILL,
510		Root:         filepath.Join(root, ns),
511		Debug:        r.config.ShimDebug,
512	}, nil
513}
514
515func (r *Runtime) getRuncOptions(ctx context.Context, id string) (*runctypes.RuncOptions, error) {
516	container, err := r.containers.Get(ctx, id)
517	if err != nil {
518		return nil, err
519	}
520
521	if container.Runtime.Options != nil {
522		v, err := typeurl.UnmarshalAny(container.Runtime.Options)
523		if err != nil {
524			return nil, err
525		}
526		ropts, ok := v.(*runctypes.RuncOptions)
527		if !ok {
528			return nil, errors.New("invalid runtime options format")
529		}
530
531		return ropts, nil
532	}
533	return &runctypes.RuncOptions{}, nil
534}
535