1// +build !windows
2
3/*
4   Copyright The containerd Authors.
5
6   Licensed under the Apache License, Version 2.0 (the "License");
7   you may not use this file except in compliance with the License.
8   You may obtain a copy of the License at
9
10       http://www.apache.org/licenses/LICENSE-2.0
11
12   Unless required by applicable law or agreed to in writing, software
13   distributed under the License is distributed on an "AS IS" BASIS,
14   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   See the License for the specific language governing permissions and
16   limitations under the License.
17*/
18
19package process
20
21import (
22	"context"
23	"encoding/json"
24	"fmt"
25	"io"
26	"os"
27	"path/filepath"
28	"strings"
29	"sync"
30	"time"
31
32	"github.com/containerd/console"
33	"github.com/containerd/containerd/log"
34	"github.com/containerd/containerd/mount"
35	"github.com/containerd/containerd/pkg/stdio"
36	"github.com/containerd/fifo"
37	runc "github.com/containerd/go-runc"
38	google_protobuf "github.com/gogo/protobuf/types"
39	specs "github.com/opencontainers/runtime-spec/specs-go"
40	"github.com/pkg/errors"
41	"golang.org/x/sys/unix"
42)
43
44// Init represents an initial process for a container
45type Init struct {
46	wg        sync.WaitGroup
47	initState initState
48
49	// mu is used to ensure that `Start()` and `Exited()` calls return in
50	// the right order when invoked in separate go routines.
51	// This is the case within the shim implementation as it makes use of
52	// the reaper interface.
53	mu sync.Mutex
54
55	waitBlock chan struct{}
56
57	WorkDir string
58
59	id       string
60	Bundle   string
61	console  console.Console
62	Platform stdio.Platform
63	io       *processIO
64	runtime  *runc.Runc
65	// pausing preserves the pausing state.
66	pausing      *atomicBool
67	status       int
68	exited       time.Time
69	pid          int
70	closers      []io.Closer
71	stdin        io.Closer
72	stdio        stdio.Stdio
73	Rootfs       string
74	IoUID        int
75	IoGID        int
76	NoPivotRoot  bool
77	NoNewKeyring bool
78	CriuWorkPath string
79}
80
81// NewRunc returns a new runc instance for a process
82func NewRunc(root, path, namespace, runtime, criu string, systemd bool) *runc.Runc {
83	if root == "" {
84		root = RuncRoot
85	}
86	return &runc.Runc{
87		Command:       runtime,
88		Log:           filepath.Join(path, "log.json"),
89		LogFormat:     runc.JSON,
90		PdeathSignal:  unix.SIGKILL,
91		Root:          filepath.Join(root, namespace),
92		Criu:          criu,
93		SystemdCgroup: systemd,
94	}
95}
96
97// New returns a new process
98func New(id string, runtime *runc.Runc, stdio stdio.Stdio) *Init {
99	p := &Init{
100		id:        id,
101		runtime:   runtime,
102		pausing:   new(atomicBool),
103		stdio:     stdio,
104		status:    0,
105		waitBlock: make(chan struct{}),
106	}
107	p.initState = &createdState{p: p}
108	return p
109}
110
111// Create the process with the provided config
112func (p *Init) Create(ctx context.Context, r *CreateConfig) error {
113	var (
114		err     error
115		socket  *runc.Socket
116		pio     *processIO
117		pidFile = newPidFile(p.Bundle)
118	)
119
120	if r.Terminal {
121		if socket, err = runc.NewTempConsoleSocket(); err != nil {
122			return errors.Wrap(err, "failed to create OCI runtime console socket")
123		}
124		defer socket.Close()
125	} else {
126		if pio, err = createIO(ctx, p.id, p.IoUID, p.IoGID, p.stdio); err != nil {
127			return errors.Wrap(err, "failed to create init process I/O")
128		}
129		p.io = pio
130	}
131	if r.Checkpoint != "" {
132		return p.createCheckpointedState(r, pidFile)
133	}
134	opts := &runc.CreateOpts{
135		PidFile:      pidFile.Path(),
136		NoPivot:      p.NoPivotRoot,
137		NoNewKeyring: p.NoNewKeyring,
138	}
139	if p.io != nil {
140		opts.IO = p.io.IO()
141	}
142	if socket != nil {
143		opts.ConsoleSocket = socket
144	}
145	if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil {
146		return p.runtimeError(err, "OCI runtime create failed")
147	}
148	if r.Stdin != "" {
149		if err := p.openStdin(r.Stdin); err != nil {
150			return err
151		}
152	}
153	ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
154	defer cancel()
155	if socket != nil {
156		console, err := socket.ReceiveMaster()
157		if err != nil {
158			return errors.Wrap(err, "failed to retrieve console master")
159		}
160		console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg)
161		if err != nil {
162			return errors.Wrap(err, "failed to start console copy")
163		}
164		p.console = console
165	} else {
166		if err := pio.Copy(ctx, &p.wg); err != nil {
167			return errors.Wrap(err, "failed to start io pipe copy")
168		}
169	}
170	pid, err := pidFile.Read()
171	if err != nil {
172		return errors.Wrap(err, "failed to retrieve OCI runtime container pid")
173	}
174	p.pid = pid
175	return nil
176}
177
178func (p *Init) openStdin(path string) error {
179	sc, err := fifo.OpenFifo(context.Background(), path, unix.O_WRONLY|unix.O_NONBLOCK, 0)
180	if err != nil {
181		return errors.Wrapf(err, "failed to open stdin fifo %s", path)
182	}
183	p.stdin = sc
184	p.closers = append(p.closers, sc)
185	return nil
186}
187
188func (p *Init) createCheckpointedState(r *CreateConfig, pidFile *pidFile) error {
189	opts := &runc.RestoreOpts{
190		CheckpointOpts: runc.CheckpointOpts{
191			ImagePath:  r.Checkpoint,
192			WorkDir:    p.CriuWorkPath,
193			ParentPath: r.ParentCheckpoint,
194		},
195		PidFile:     pidFile.Path(),
196		IO:          p.io.IO(),
197		NoPivot:     p.NoPivotRoot,
198		Detach:      true,
199		NoSubreaper: true,
200	}
201	p.initState = &createdCheckpointState{
202		p:    p,
203		opts: opts,
204	}
205	return nil
206}
207
208// Wait for the process to exit
209func (p *Init) Wait() {
210	<-p.waitBlock
211}
212
213// ID of the process
214func (p *Init) ID() string {
215	return p.id
216}
217
218// Pid of the process
219func (p *Init) Pid() int {
220	return p.pid
221}
222
223// ExitStatus of the process
224func (p *Init) ExitStatus() int {
225	p.mu.Lock()
226	defer p.mu.Unlock()
227
228	return p.status
229}
230
231// ExitedAt at time when the process exited
232func (p *Init) ExitedAt() time.Time {
233	p.mu.Lock()
234	defer p.mu.Unlock()
235
236	return p.exited
237}
238
239// Status of the process
240func (p *Init) Status(ctx context.Context) (string, error) {
241	if p.pausing.get() {
242		return "pausing", nil
243	}
244
245	p.mu.Lock()
246	defer p.mu.Unlock()
247
248	return p.initState.Status(ctx)
249}
250
251// Start the init process
252func (p *Init) Start(ctx context.Context) error {
253	p.mu.Lock()
254	defer p.mu.Unlock()
255
256	return p.initState.Start(ctx)
257}
258
259func (p *Init) start(ctx context.Context) error {
260	err := p.runtime.Start(ctx, p.id)
261	return p.runtimeError(err, "OCI runtime start failed")
262}
263
264// SetExited of the init process with the next status
265func (p *Init) SetExited(status int) {
266	p.mu.Lock()
267	defer p.mu.Unlock()
268
269	p.initState.SetExited(status)
270}
271
272func (p *Init) setExited(status int) {
273	p.exited = time.Now()
274	p.status = status
275	p.Platform.ShutdownConsole(context.Background(), p.console)
276	close(p.waitBlock)
277}
278
279// Delete the init process
280func (p *Init) Delete(ctx context.Context) error {
281	p.mu.Lock()
282	defer p.mu.Unlock()
283
284	return p.initState.Delete(ctx)
285}
286
287func (p *Init) delete(ctx context.Context) error {
288	waitTimeout(ctx, &p.wg, 2*time.Second)
289	err := p.runtime.Delete(ctx, p.id, nil)
290	// ignore errors if a runtime has already deleted the process
291	// but we still hold metadata and pipes
292	//
293	// this is common during a checkpoint, runc will delete the container state
294	// after a checkpoint and the container will no longer exist within runc
295	if err != nil {
296		if strings.Contains(err.Error(), "does not exist") {
297			err = nil
298		} else {
299			err = p.runtimeError(err, "failed to delete task")
300		}
301	}
302	if p.io != nil {
303		for _, c := range p.closers {
304			c.Close()
305		}
306		p.io.Close()
307	}
308	if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil {
309		log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount")
310		if err == nil {
311			err = errors.Wrap(err2, "failed rootfs umount")
312		}
313	}
314	return err
315}
316
317// Resize the init processes console
318func (p *Init) Resize(ws console.WinSize) error {
319	p.mu.Lock()
320	defer p.mu.Unlock()
321
322	if p.console == nil {
323		return nil
324	}
325	return p.console.Resize(ws)
326}
327
328// Pause the init process and all its child processes
329func (p *Init) Pause(ctx context.Context) error {
330	p.mu.Lock()
331	defer p.mu.Unlock()
332
333	return p.initState.Pause(ctx)
334}
335
336// Resume the init process and all its child processes
337func (p *Init) Resume(ctx context.Context) error {
338	p.mu.Lock()
339	defer p.mu.Unlock()
340
341	return p.initState.Resume(ctx)
342}
343
344// Kill the init process
345func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
346	p.mu.Lock()
347	defer p.mu.Unlock()
348
349	return p.initState.Kill(ctx, signal, all)
350}
351
352func (p *Init) kill(ctx context.Context, signal uint32, all bool) error {
353	err := p.runtime.Kill(ctx, p.id, int(signal), &runc.KillOpts{
354		All: all,
355	})
356	return checkKillError(err)
357}
358
359// KillAll processes belonging to the init process
360func (p *Init) KillAll(ctx context.Context) error {
361	p.mu.Lock()
362	defer p.mu.Unlock()
363
364	err := p.runtime.Kill(ctx, p.id, int(unix.SIGKILL), &runc.KillOpts{
365		All: true,
366	})
367	return p.runtimeError(err, "OCI runtime killall failed")
368}
369
370// Stdin of the process
371func (p *Init) Stdin() io.Closer {
372	return p.stdin
373}
374
375// Runtime returns the OCI runtime configured for the init process
376func (p *Init) Runtime() *runc.Runc {
377	return p.runtime
378}
379
380// Exec returns a new child process
381func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
382	p.mu.Lock()
383	defer p.mu.Unlock()
384
385	return p.initState.Exec(ctx, path, r)
386}
387
388// exec returns a new exec'd process
389func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
390	// process exec request
391	var spec specs.Process
392	if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
393		return nil, err
394	}
395	spec.Terminal = r.Terminal
396
397	e := &execProcess{
398		id:     r.ID,
399		path:   path,
400		parent: p,
401		spec:   spec,
402		stdio: stdio.Stdio{
403			Stdin:    r.Stdin,
404			Stdout:   r.Stdout,
405			Stderr:   r.Stderr,
406			Terminal: r.Terminal,
407		},
408		waitBlock: make(chan struct{}),
409	}
410	e.execState = &execCreatedState{p: e}
411	return e, nil
412}
413
414// Checkpoint the init process
415func (p *Init) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
416	p.mu.Lock()
417	defer p.mu.Unlock()
418
419	return p.initState.Checkpoint(ctx, r)
420}
421
422func (p *Init) checkpoint(ctx context.Context, r *CheckpointConfig) error {
423	var actions []runc.CheckpointAction
424	if !r.Exit {
425		actions = append(actions, runc.LeaveRunning)
426	}
427	// keep criu work directory if criu work dir is set
428	work := r.WorkDir
429	if work == "" {
430		work = filepath.Join(p.WorkDir, "criu-work")
431		defer os.RemoveAll(work)
432	}
433	if err := p.runtime.Checkpoint(ctx, p.id, &runc.CheckpointOpts{
434		WorkDir:                  work,
435		ImagePath:                r.Path,
436		AllowOpenTCP:             r.AllowOpenTCP,
437		AllowExternalUnixSockets: r.AllowExternalUnixSockets,
438		AllowTerminal:            r.AllowTerminal,
439		FileLocks:                r.FileLocks,
440		EmptyNamespaces:          r.EmptyNamespaces,
441	}, actions...); err != nil {
442		dumpLog := filepath.Join(p.Bundle, "criu-dump.log")
443		if cerr := copyFile(dumpLog, filepath.Join(work, "dump.log")); cerr != nil {
444			log.G(ctx).Error(err)
445		}
446		return fmt.Errorf("%s path= %s", criuError(err), dumpLog)
447	}
448	return nil
449}
450
451// Update the processes resource configuration
452func (p *Init) Update(ctx context.Context, r *google_protobuf.Any) error {
453	p.mu.Lock()
454	defer p.mu.Unlock()
455
456	return p.initState.Update(ctx, r)
457}
458
459func (p *Init) update(ctx context.Context, r *google_protobuf.Any) error {
460	var resources specs.LinuxResources
461	if err := json.Unmarshal(r.Value, &resources); err != nil {
462		return err
463	}
464	return p.runtime.Update(ctx, p.id, &resources)
465}
466
467// Stdio of the process
468func (p *Init) Stdio() stdio.Stdio {
469	return p.stdio
470}
471
472func (p *Init) runtimeError(rErr error, msg string) error {
473	if rErr == nil {
474		return nil
475	}
476
477	rMsg, err := getLastRuntimeError(p.runtime)
478	switch {
479	case err != nil:
480		return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error())
481	case rMsg == "":
482		return errors.Wrap(rErr, msg)
483	default:
484		return errors.Errorf("%s: %s", msg, rMsg)
485	}
486}
487
488func withConditionalIO(c stdio.Stdio) runc.IOOpt {
489	return func(o *runc.IOOption) {
490		o.OpenStdin = c.Stdin != ""
491		o.OpenStdout = c.Stdout != ""
492		o.OpenStderr = c.Stderr != ""
493	}
494}
495