1package main
2
3import (
4	"context"
5	"fmt"
6	"os"
7	"path/filepath"
8	"sync"
9	"time"
10
11	eventstypes "github.com/containerd/containerd/api/events"
12	"github.com/containerd/containerd/errdefs"
13	"github.com/containerd/containerd/runtime"
14	"github.com/containerd/containerd/runtime/v2/task"
15	"github.com/containerd/typeurl"
16	specs "github.com/opencontainers/runtime-spec/specs-go"
17	"github.com/pkg/errors"
18	"github.com/sirupsen/logrus"
19	"go.opencensus.io/trace"
20
21	"github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
22	runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
23	"github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats"
24	"github.com/Microsoft/hcsshim/internal/cmd"
25	"github.com/Microsoft/hcsshim/internal/cow"
26	"github.com/Microsoft/hcsshim/internal/hcs"
27	"github.com/Microsoft/hcsshim/internal/hcsoci"
28	"github.com/Microsoft/hcsshim/internal/log"
29	"github.com/Microsoft/hcsshim/internal/oci"
30	"github.com/Microsoft/hcsshim/internal/resources"
31	"github.com/Microsoft/hcsshim/internal/schema1"
32	hcsschema "github.com/Microsoft/hcsshim/internal/schema2"
33	"github.com/Microsoft/hcsshim/internal/shimdiag"
34	"github.com/Microsoft/hcsshim/internal/uvm"
35	"github.com/Microsoft/hcsshim/osversion"
36)
37
38func newHcsStandaloneTask(ctx context.Context, events publisher, req *task.CreateTaskRequest, s *specs.Spec) (shimTask, error) {
39	log.G(ctx).WithField("tid", req.ID).Debug("newHcsStandaloneTask")
40
41	ct, _, err := oci.GetSandboxTypeAndID(s.Annotations)
42	if err != nil {
43		return nil, err
44	}
45	if ct != oci.KubernetesContainerTypeNone {
46		return nil, errors.Wrapf(
47			errdefs.ErrFailedPrecondition,
48			"cannot create standalone task, expected no annotation: '%s': got '%s'",
49			oci.KubernetesContainerTypeAnnotation,
50			ct)
51	}
52
53	owner := filepath.Base(os.Args[0])
54
55	var parent *uvm.UtilityVM
56	if osversion.Get().Build >= osversion.RS5 && oci.IsIsolated(s) {
57		// Create the UVM parent
58		opts, err := oci.SpecToUVMCreateOpts(ctx, s, fmt.Sprintf("%s@vm", req.ID), owner)
59		if err != nil {
60			return nil, err
61		}
62		switch opts.(type) {
63		case *uvm.OptionsLCOW:
64			lopts := (opts).(*uvm.OptionsLCOW)
65			parent, err = uvm.CreateLCOW(ctx, lopts)
66			if err != nil {
67				return nil, err
68			}
69		case *uvm.OptionsWCOW:
70			wopts := (opts).(*uvm.OptionsWCOW)
71
72			// In order for the UVM sandbox.vhdx not to collide with the actual
73			// nested Argon sandbox.vhdx we append the \vm folder to the last
74			// entry in the list.
75			layersLen := len(s.Windows.LayerFolders)
76			layers := make([]string, layersLen)
77			copy(layers, s.Windows.LayerFolders)
78
79			vmPath := filepath.Join(layers[layersLen-1], "vm")
80			err := os.MkdirAll(vmPath, 0)
81			if err != nil {
82				return nil, err
83			}
84			layers[layersLen-1] = vmPath
85			wopts.LayerFolders = layers
86
87			parent, err = uvm.CreateWCOW(ctx, wopts)
88			if err != nil {
89				return nil, err
90			}
91		}
92		err = parent.Start(ctx)
93		if err != nil {
94			parent.Close()
95		}
96	} else if !oci.IsWCOW(s) {
97		return nil, errors.Wrap(errdefs.ErrFailedPrecondition, "oci spec does not contain WCOW or LCOW spec")
98	}
99
100	shim, err := newHcsTask(ctx, events, parent, true, req, s)
101	if err != nil {
102		if parent != nil {
103			parent.Close()
104		}
105		return nil, err
106	}
107	return shim, nil
108}
109
110// newHcsTask creates a container within `parent` and its init exec process in
111// the `shimExecCreated` state and returns the task that tracks its lifetime.
112//
113// If `parent == nil` the container is created on the host.
114func newHcsTask(
115	ctx context.Context,
116	events publisher,
117	parent *uvm.UtilityVM,
118	ownsParent bool,
119	req *task.CreateTaskRequest,
120	s *specs.Spec) (_ shimTask, err error) {
121	log.G(ctx).WithFields(logrus.Fields{
122		"tid":        req.ID,
123		"ownsParent": ownsParent,
124	}).Debug("newHcsTask")
125
126	owner := filepath.Base(os.Args[0])
127	isTemplate := oci.ParseAnnotationsSaveAsTemplate(ctx, s)
128
129	io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal)
130	if err != nil {
131		return nil, err
132	}
133
134	var netNS string
135	if s.Windows != nil &&
136		s.Windows.Network != nil {
137		netNS = s.Windows.Network.NetworkNamespace
138	}
139
140	var shimOpts *runhcsopts.Options
141	if req.Options != nil {
142		v, err := typeurl.UnmarshalAny(req.Options)
143		if err != nil {
144			return nil, err
145		}
146		shimOpts = v.(*runhcsopts.Options)
147	}
148
149	opts := hcsoci.CreateOptions{
150		ID:               req.ID,
151		Owner:            owner,
152		Spec:             s,
153		HostingSystem:    parent,
154		NetworkNamespace: netNS,
155	}
156
157	if shimOpts != nil {
158		opts.ScaleCPULimitsToSandbox = shimOpts.ScaleCpuLimitsToSandbox
159	}
160
161	system, resources, err := hcsoci.CreateContainer(ctx, &opts)
162	if err != nil {
163		return nil, err
164	}
165
166	ht := &hcsTask{
167		events:     events,
168		id:         req.ID,
169		isWCOW:     oci.IsWCOW(s),
170		c:          system,
171		cr:         resources,
172		ownsHost:   ownsParent,
173		host:       parent,
174		closed:     make(chan struct{}),
175		taskSpec:   s,
176		isTemplate: isTemplate,
177	}
178	ht.init = newHcsExec(
179		ctx,
180		events,
181		req.ID,
182		parent,
183		system,
184		req.ID,
185		req.Bundle,
186		ht.isWCOW,
187		s.Process,
188		io,
189	)
190
191	if parent != nil {
192		// We have a parent UVM. Listen for its exit and forcibly close this
193		// task. This is not expected but in the event of a UVM crash we need to
194		// handle this case.
195		go ht.waitForHostExit()
196	}
197
198	// In the normal case the `Signal` call from the caller killed this task's
199	// init process. Or the init process ran to completion - this will mostly
200	// happen when we are creating a template and want to wait for init process
201	// to finish before we save the template. In such cases do not tear down the
202	// container after init exits - because we need the container in the template
203	go ht.waitInitExit(!isTemplate)
204
205	// Publish the created event
206	ht.events.publishEvent(
207		ctx,
208		runtime.TaskCreateEventTopic,
209		&eventstypes.TaskCreate{
210			ContainerID: req.ID,
211			Bundle:      req.Bundle,
212			Rootfs:      req.Rootfs,
213			IO: &eventstypes.TaskIO{
214				Stdin:    req.Stdin,
215				Stdout:   req.Stdout,
216				Stderr:   req.Stderr,
217				Terminal: req.Terminal,
218			},
219			Checkpoint: "",
220			Pid:        uint32(ht.init.Pid()),
221		})
222	return ht, nil
223}
224
225// newClonedTask creates a container within `parent`. The parent must be already cloned
226// from a template and hence this container must already be present inside that parent.
227// This function simply creates the go wrapper around the container that is already
228// running inside the cloned parent.
229// This task MAY own the UVM that it is running in but as of now the cloning feature is
230// only used for WCOW hyper-V isolated containers and for WCOW, the wcowPodSandboxTask
231// owns that UVM.
232func newClonedHcsTask(
233	ctx context.Context,
234	events publisher,
235	parent *uvm.UtilityVM,
236	ownsParent bool,
237	req *task.CreateTaskRequest,
238	s *specs.Spec,
239	templateID string) (_ shimTask, err error) {
240	log.G(ctx).WithFields(logrus.Fields{
241		"tid":        req.ID,
242		"ownsParent": ownsParent,
243		"templateid": templateID,
244	}).Debug("newClonedHcsTask")
245
246	owner := filepath.Base(os.Args[0])
247
248	if parent.OS() != "windows" {
249		return nil, fmt.Errorf("cloned task can only be created inside a windows host")
250	}
251
252	io, err := cmd.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal)
253	if err != nil {
254		return nil, err
255	}
256
257	var netNS string
258	if s.Windows != nil &&
259		s.Windows.Network != nil {
260		netNS = s.Windows.Network.NetworkNamespace
261	}
262
263	// This is a cloned task. Use the templateid as the ID of the container here
264	// because that's the ID of this container inside the UVM.
265	opts := hcsoci.CreateOptions{
266		ID:               templateID,
267		Owner:            owner,
268		Spec:             s,
269		HostingSystem:    parent,
270		NetworkNamespace: netNS,
271	}
272	system, resources, err := hcsoci.CloneContainer(ctx, &opts)
273	if err != nil {
274		return nil, err
275	}
276
277	ht := &hcsTask{
278		events:     events,
279		id:         req.ID,
280		isWCOW:     oci.IsWCOW(s),
281		c:          system,
282		cr:         resources,
283		ownsHost:   ownsParent,
284		host:       parent,
285		closed:     make(chan struct{}),
286		templateID: templateID,
287		taskSpec:   s,
288		isTemplate: false,
289	}
290	ht.init = newClonedExec(
291		ctx,
292		events,
293		req.ID,
294		parent,
295		system,
296		req.ID,
297		req.Bundle,
298		ht.isWCOW,
299		s.Process,
300		io)
301
302	if parent != nil {
303		// We have a parent UVM. Listen for its exit and forcibly close this
304		// task. This is not expected but in the event of a UVM crash we need to
305		// handle this case.
306		go ht.waitForHostExit()
307	}
308
309	// In the normal case the `Signal` call from the caller killed this task's
310	// init process.
311	go ht.waitInitExit(true)
312
313	// Publish the created event
314	ht.events.publishEvent(
315		ctx,
316		runtime.TaskCreateEventTopic,
317		&eventstypes.TaskCreate{
318			ContainerID: req.ID,
319			Bundle:      req.Bundle,
320			Rootfs:      req.Rootfs,
321			IO: &eventstypes.TaskIO{
322				Stdin:    req.Stdin,
323				Stdout:   req.Stdout,
324				Stderr:   req.Stderr,
325				Terminal: req.Terminal,
326			},
327			Checkpoint: "",
328			Pid:        uint32(ht.init.Pid()),
329		})
330	return ht, nil
331}
332
333var _ = (shimTask)(&hcsTask{})
334
335// hcsTask is a generic task that represents a WCOW Container (process or
336// hypervisor isolated), or a LCOW Container. This task MAY own the UVM the
337// container is in but in the case of a POD it may just track the UVM for
338// container lifetime management. In the case of ownership when the init
339// task/exec is stopped the UVM itself will be stopped as well.
340type hcsTask struct {
341	events publisher
342	// id is the id of this task when it is created.
343	//
344	// It MUST be treated as read only in the liftetime of the task.
345	id string
346	// isWCOW is set to `true` if this is a task representing a Windows container.
347	//
348	// It MUST be treated as read only in the liftetime of the task.
349	isWCOW bool
350	// c is the container backing this task.
351	//
352	// It MUST be treated as read only in the lifetime of this task EXCEPT after
353	// a Kill to the init task in which it must be shutdown.
354	c cow.Container
355	// cr is the container resources this task is holding.
356	//
357	// It MUST be treated as read only in the lifetime of this task EXCEPT after
358	// a Kill to the init task in which all resources must be released.
359	cr *resources.Resources
360	// init is the init process of the container.
361	//
362	// Note: the invariant `container state == init.State()` MUST be true. IE:
363	// if the init process exits the container as a whole and all exec's MUST
364	// exit.
365	//
366	// It MUST be treated as read only in the lifetime of the task.
367	init shimExec
368	// ownsHost is `true` if this task owns `host`. If so when this tasks init
369	// exec shuts down it is required that `host` be shut down as well.
370	ownsHost bool
371	// host is the hosting VM for this exec if hypervisor isolated. If
372	// `host==nil` this is an Argon task so no UVM cleanup is required.
373	//
374	// NOTE: if `osversion.Get().Build < osversion.RS5` this will always be
375	// `nil`.
376	host *uvm.UtilityVM
377
378	// ecl is the exec create lock for all non-init execs and MUST be held
379	// durring create to prevent ID duplication.
380	ecl   sync.Mutex
381	execs sync.Map
382
383	closed    chan struct{}
384	closeOnce sync.Once
385	// closeHostOnce is used to close `host`. This will only be used if
386	// `ownsHost==true` and `host != nil`.
387	closeHostOnce sync.Once
388
389	// templateID represents the id of the template container from which this container
390	// is cloned. The parent UVM (inside which this container is running) identifies this
391	// container with it's original id (i.e the id that was assigned to this container
392	// at the time of template creation i.e the templateID). Hence, every request that
393	// is sent to the GCS must actually use templateID to reference this container.
394	// A non-empty templateID specifies that this task was cloned.
395	templateID string
396
397	// if isTemplate is true then this container will be saved as a template as soon
398	// as its init process exits. Note: templateID and isTemplate are mutually exclusive.
399	// i.e isTemplate can not be true when templateID is not empty.
400	isTemplate bool
401
402	// taskSpec represents the spec/configuration for this task.
403	taskSpec *specs.Spec
404}
405
406func (ht *hcsTask) ID() string {
407	return ht.id
408}
409
410func (ht *hcsTask) CreateExec(ctx context.Context, req *task.ExecProcessRequest, spec *specs.Process) error {
411	ht.ecl.Lock()
412	defer ht.ecl.Unlock()
413
414	// If the task exists or we got a request for "" which is the init task
415	// fail.
416	if _, loaded := ht.execs.Load(req.ExecID); loaded || req.ExecID == "" {
417		return errors.Wrapf(errdefs.ErrAlreadyExists, "exec: '%s' in task: '%s' already exists", req.ExecID, ht.id)
418	}
419
420	if ht.init.State() != shimExecStateRunning {
421		return errors.Wrapf(errdefs.ErrFailedPrecondition, "exec: '' in task: '%s' must be running to create additional execs", ht.id)
422	}
423
424	io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal)
425	if err != nil {
426		return err
427	}
428
429	he := newHcsExec(
430		ctx,
431		ht.events,
432		ht.id,
433		ht.host,
434		ht.c,
435		req.ExecID,
436		ht.init.Status().Bundle,
437		ht.isWCOW,
438		spec,
439		io,
440	)
441
442	ht.execs.Store(req.ExecID, he)
443
444	// Publish the created event
445	ht.events.publishEvent(
446		ctx,
447		runtime.TaskExecAddedEventTopic,
448		&eventstypes.TaskExecAdded{
449			ContainerID: ht.id,
450			ExecID:      req.ExecID,
451		})
452
453	return nil
454}
455
456func (ht *hcsTask) GetExec(eid string) (shimExec, error) {
457	if eid == "" {
458		return ht.init, nil
459	}
460	raw, loaded := ht.execs.Load(eid)
461	if !loaded {
462		return nil, errors.Wrapf(errdefs.ErrNotFound, "exec: '%s' in task: '%s' not found", eid, ht.id)
463	}
464	return raw.(shimExec), nil
465}
466
467func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all bool) error {
468	e, err := ht.GetExec(eid)
469	if err != nil {
470		return err
471	}
472	if all && eid != "" {
473		return errors.Wrapf(errdefs.ErrFailedPrecondition, "cannot signal all for non-empty exec: '%s'", eid)
474	}
475	if all {
476		// We are in a kill all on the init task. Signal everything.
477		ht.execs.Range(func(key, value interface{}) bool {
478			err := value.(shimExec).Kill(ctx, signal)
479			if err != nil {
480				log.G(ctx).WithFields(logrus.Fields{
481					"eid":           key,
482					logrus.ErrorKey: err,
483				}).Warn("failed to kill exec in task")
484			}
485
486			// iterate all
487			return false
488		})
489	}
490	if signal == 0x9 && eid == "" && ht.host != nil {
491		// If this is a SIGKILL against the init process we start a background
492		// timer and wait on either the timer expiring or the process exiting
493		// cleanly. If the timer exires first we forcibly close the UVM as we
494		// assume the guest is misbehaving for some reason.
495		go func() {
496			t := time.NewTimer(30 * time.Second)
497			execExited := make(chan struct{})
498			go func() {
499				e.Wait()
500				close(execExited)
501			}()
502			select {
503			case <-execExited:
504				t.Stop()
505			case <-t.C:
506				// Safe to call multiple times if called previously on
507				// successful shutdown.
508				ht.host.Close()
509			}
510		}()
511	}
512	return e.Kill(ctx, signal)
513}
514
515func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, time.Time, error) {
516	e, err := ht.GetExec(eid)
517	if err != nil {
518		return 0, 0, time.Time{}, err
519	}
520	if eid == "" {
521		// We are deleting the init exec. Forcibly exit any additional exec's.
522		ht.execs.Range(func(key, value interface{}) bool {
523			ex := value.(shimExec)
524			if s := ex.State(); s != shimExecStateExited {
525				ex.ForceExit(ctx, 1)
526			}
527
528			// iterate next
529			return false
530		})
531	}
532	switch state := e.State(); state {
533	case shimExecStateCreated:
534		e.ForceExit(ctx, 0)
535	case shimExecStateRunning:
536		return 0, 0, time.Time{}, newExecInvalidStateError(ht.id, eid, state, "delete")
537	}
538	status := e.Status()
539	if eid != "" {
540		ht.execs.Delete(eid)
541	}
542
543	// Publish the deleted event
544	ht.events.publishEvent(
545		ctx,
546		runtime.TaskDeleteEventTopic,
547		&eventstypes.TaskDelete{
548			ContainerID: ht.id,
549			ID:          eid,
550			Pid:         status.Pid,
551			ExitStatus:  status.ExitStatus,
552			ExitedAt:    status.ExitedAt,
553		})
554
555	return int(status.Pid), status.ExitStatus, status.ExitedAt, nil
556}
557
558func (ht *hcsTask) Pids(ctx context.Context) ([]options.ProcessDetails, error) {
559	// Map all user created exec's to pid/exec-id
560	pidMap := make(map[int]string)
561	ht.execs.Range(func(key, value interface{}) bool {
562		ex := value.(shimExec)
563		pidMap[ex.Pid()] = ex.ID()
564
565		// Iterate all
566		return false
567	})
568	pidMap[ht.init.Pid()] = ht.init.ID()
569
570	// Get the guest pids
571	props, err := ht.c.Properties(ctx, schema1.PropertyTypeProcessList)
572	if err != nil {
573		return nil, err
574	}
575
576	// Copy to pid/exec-id pair's
577	pairs := make([]options.ProcessDetails, len(props.ProcessList))
578	for i, p := range props.ProcessList {
579		pairs[i].ImageName = p.ImageName
580		pairs[i].CreatedAt = p.CreateTimestamp
581		pairs[i].KernelTime_100Ns = p.KernelTime100ns
582		pairs[i].MemoryCommitBytes = p.MemoryCommitBytes
583		pairs[i].MemoryWorkingSetPrivateBytes = p.MemoryWorkingSetPrivateBytes
584		pairs[i].MemoryWorkingSetSharedBytes = p.MemoryWorkingSetSharedBytes
585		pairs[i].ProcessID = p.ProcessId
586		pairs[i].UserTime_100Ns = p.KernelTime100ns
587
588		if eid, ok := pidMap[int(p.ProcessId)]; ok {
589			pairs[i].ExecID = eid
590		}
591	}
592	return pairs, nil
593}
594
595func (ht *hcsTask) Wait() *task.StateResponse {
596	<-ht.closed
597	return ht.init.Wait()
598}
599
600func (ht *hcsTask) waitInitExit(destroyContainer bool) {
601	ctx, span := trace.StartSpan(context.Background(), "hcsTask::waitInitExit")
602	defer span.End()
603	span.AddAttributes(trace.StringAttribute("tid", ht.id))
604
605	// Wait for it to exit on its own
606	ht.init.Wait()
607
608	if destroyContainer {
609		// Close the host and event the exit
610		ht.close(ctx)
611	} else {
612		// Close the container's host, but do not close or terminate the container itself
613		ht.closeHost(ctx)
614	}
615
616	if ht.isTemplate {
617		// Save the host as a template
618		if err := saveAsTemplate(ctx, ht); err != nil {
619			log.G(ctx).WithError(err).Error("failed to save as template")
620		}
621	}
622}
623
624// waitForHostExit waits for the host virtual machine to exit. Once exited
625// forcibly exits all additional exec's in this task.
626//
627// This MUST be called via a goroutine to wait on a background thread.
628//
629// Note: For Windows process isolated containers there is no host virtual
630// machine so this should not be called.
631func (ht *hcsTask) waitForHostExit() {
632	ctx, span := trace.StartSpan(context.Background(), "hcsTask::waitForHostExit")
633	defer span.End()
634	span.AddAttributes(trace.StringAttribute("tid", ht.id))
635
636	err := ht.host.Wait()
637	if err != nil {
638		log.G(ctx).WithError(err).Error("failed to wait for host virtual machine exit")
639	} else {
640		log.G(ctx).Debug("host virtual machine exited")
641	}
642
643	ht.execs.Range(func(key, value interface{}) bool {
644		ex := value.(shimExec)
645		ex.ForceExit(ctx, 1)
646
647		// iterate all
648		return false
649	})
650	ht.init.ForceExit(ctx, 1)
651	ht.closeHost(ctx)
652}
653
654// close shuts down the container that is owned by this task and if
655// `ht.ownsHost` will shutdown the hosting VM the container was placed in.
656//
657// NOTE: For Windows process isolated containers `ht.ownsHost==true && ht.host
658// == nil`.
659func (ht *hcsTask) close(ctx context.Context) {
660	ht.closeOnce.Do(func() {
661		log.G(ctx).Debug("hcsTask::closeOnce")
662
663		// ht.c should never be nil for a real task but in testing we stub
664		// this to avoid a nil dereference. We really should introduce a
665		// method or interface for ht.c operations that we can stub for
666		// testing.
667		if ht.c != nil {
668			// Do our best attempt to tear down the container.
669			var werr error
670			ch := make(chan struct{})
671			go func() {
672				werr = ht.c.Wait()
673				close(ch)
674			}()
675			err := ht.c.Shutdown(ctx)
676			if err != nil {
677				log.G(ctx).WithError(err).Error("failed to shutdown container")
678			} else {
679				t := time.NewTimer(time.Second * 30)
680				select {
681				case <-ch:
682					err = werr
683					t.Stop()
684					if err != nil {
685						log.G(ctx).WithError(err).Error("failed to wait for container shutdown")
686					}
687				case <-t.C:
688					log.G(ctx).WithError(hcs.ErrTimeout).Error("failed to wait for container shutdown")
689				}
690			}
691
692			if err != nil {
693				err = ht.c.Terminate(ctx)
694				if err != nil {
695					log.G(ctx).WithError(err).Error("failed to terminate container")
696				} else {
697					t := time.NewTimer(time.Second * 30)
698					select {
699					case <-ch:
700						err = werr
701						t.Stop()
702						if err != nil {
703							log.G(ctx).WithError(err).Error("failed to wait for container terminate")
704						}
705					case <-t.C:
706						log.G(ctx).WithError(hcs.ErrTimeout).Error("failed to wait for container terminate")
707					}
708				}
709			}
710
711			// Release any resources associated with the container.
712			if err := resources.ReleaseResources(ctx, ht.cr, ht.host, true); err != nil {
713				log.G(ctx).WithError(err).Error("failed to release container resources")
714			}
715
716			// Close the container handle invalidating all future access.
717			if err := ht.c.Close(); err != nil {
718				log.G(ctx).WithError(err).Error("failed to close container")
719			}
720		}
721		ht.closeHost(ctx)
722	})
723}
724
725// closeHost safely closes the hosting UVM if this task is the owner. Once
726// closed and all resources released it events the `runtime.TaskExitEventTopic`
727// for all upstream listeners.
728//
729// Note: If this is a process isolated task the hosting UVM is simply a `noop`.
730//
731// This call is idempotent and safe to call multiple times.
732func (ht *hcsTask) closeHost(ctx context.Context) {
733	ht.closeHostOnce.Do(func() {
734		log.G(ctx).Debug("hcsTask::closeHostOnce")
735
736		if ht.ownsHost && ht.host != nil {
737			if err := ht.host.Close(); err != nil {
738				log.G(ctx).WithError(err).Error("failed host vm shutdown")
739			}
740		}
741		// Send the `init` exec exit notification always.
742		exit := ht.init.Status()
743		ht.events.publishEvent(
744			ctx,
745			runtime.TaskExitEventTopic,
746			&eventstypes.TaskExit{
747				ContainerID: ht.id,
748				ID:          exit.ID,
749				Pid:         uint32(exit.Pid),
750				ExitStatus:  exit.ExitStatus,
751				ExitedAt:    exit.ExitedAt,
752			})
753		close(ht.closed)
754	})
755}
756
757func (ht *hcsTask) ExecInHost(ctx context.Context, req *shimdiag.ExecProcessRequest) (int, error) {
758	if ht.host == nil {
759		return cmd.ExecInShimHost(ctx, req)
760	}
761	return cmd.ExecInUvm(ctx, ht.host, req)
762}
763
764func (ht *hcsTask) DumpGuestStacks(ctx context.Context) string {
765	if ht.host != nil {
766		stacks, err := ht.host.DumpStacks(ctx)
767		if err != nil {
768			log.G(ctx).WithError(err).Warn("failed to capture guest stacks")
769		} else {
770			return stacks
771		}
772	}
773	return ""
774}
775
776func (ht *hcsTask) Share(ctx context.Context, req *shimdiag.ShareRequest) error {
777	if ht.host == nil {
778		return errTaskNotIsolated
779	}
780	// For hyper-v isolated WCOW the task used isn't the standard hcsTask so we
781	// only have to deal with the LCOW case here.
782	st, err := os.Stat(req.HostPath)
783	if err != nil {
784		return fmt.Errorf("could not open '%s' path on host: %s", req.HostPath, err)
785	}
786	var (
787		hostPath       string = req.HostPath
788		restrictAccess bool
789		fileName       string
790		allowedNames   []string
791	)
792	if !st.IsDir() {
793		hostPath, fileName = filepath.Split(hostPath)
794		allowedNames = append(allowedNames, fileName)
795		restrictAccess = true
796	}
797	_, err = ht.host.AddPlan9(ctx, hostPath, req.UvmPath, req.ReadOnly, restrictAccess, allowedNames)
798	return err
799}
800
801func hcsPropertiesToWindowsStats(props *hcsschema.Properties) *stats.Statistics_Windows {
802	wcs := &stats.Statistics_Windows{Windows: &stats.WindowsContainerStatistics{}}
803	if props.Statistics != nil {
804		wcs.Windows.Timestamp = props.Statistics.Timestamp
805		wcs.Windows.ContainerStartTime = props.Statistics.ContainerStartTime
806		wcs.Windows.UptimeNS = props.Statistics.Uptime100ns * 100
807		if props.Statistics.Processor != nil {
808			wcs.Windows.Processor = &stats.WindowsContainerProcessorStatistics{
809				TotalRuntimeNS:  props.Statistics.Processor.TotalRuntime100ns * 100,
810				RuntimeUserNS:   props.Statistics.Processor.RuntimeUser100ns * 100,
811				RuntimeKernelNS: props.Statistics.Processor.RuntimeKernel100ns * 100,
812			}
813		}
814		if props.Statistics.Memory != nil {
815			wcs.Windows.Memory = &stats.WindowsContainerMemoryStatistics{
816				MemoryUsageCommitBytes:            props.Statistics.Memory.MemoryUsageCommitBytes,
817				MemoryUsageCommitPeakBytes:        props.Statistics.Memory.MemoryUsageCommitPeakBytes,
818				MemoryUsagePrivateWorkingSetBytes: props.Statistics.Memory.MemoryUsagePrivateWorkingSetBytes,
819			}
820		}
821		if props.Statistics.Storage != nil {
822			wcs.Windows.Storage = &stats.WindowsContainerStorageStatistics{
823				ReadCountNormalized:  props.Statistics.Storage.ReadCountNormalized,
824				ReadSizeBytes:        props.Statistics.Storage.ReadSizeBytes,
825				WriteCountNormalized: props.Statistics.Storage.WriteCountNormalized,
826				WriteSizeBytes:       props.Statistics.Storage.WriteSizeBytes,
827			}
828		}
829	}
830	return wcs
831}
832
833func (ht *hcsTask) Stats(ctx context.Context) (*stats.Statistics, error) {
834	s := &stats.Statistics{}
835	props, err := ht.c.PropertiesV2(ctx, hcsschema.PTStatistics)
836	if err != nil && !isStatsNotFound(err) {
837		return nil, err
838	}
839	if props != nil {
840		if ht.isWCOW {
841			s.Container = hcsPropertiesToWindowsStats(props)
842		} else {
843			s.Container = &stats.Statistics_Linux{Linux: props.Metrics}
844		}
845	}
846	if ht.ownsHost && ht.host != nil {
847		vmStats, err := ht.host.Stats(ctx)
848		if err != nil && !isStatsNotFound(err) {
849			return nil, err
850		}
851		s.VM = vmStats
852	}
853	return s, nil
854}
855