1package hcs
2
3import (
4	"context"
5	"encoding/json"
6	"errors"
7	"strings"
8	"sync"
9	"syscall"
10
11	"github.com/Microsoft/hcsshim/internal/cow"
12	"github.com/Microsoft/hcsshim/internal/log"
13	"github.com/Microsoft/hcsshim/internal/oc"
14	"github.com/Microsoft/hcsshim/internal/schema1"
15	hcsschema "github.com/Microsoft/hcsshim/internal/schema2"
16	"github.com/Microsoft/hcsshim/internal/timeout"
17	"github.com/Microsoft/hcsshim/internal/vmcompute"
18	"go.opencensus.io/trace"
19)
20
21type System struct {
22	handleLock     sync.RWMutex
23	handle         vmcompute.HcsSystem
24	id             string
25	callbackNumber uintptr
26
27	closedWaitOnce sync.Once
28	waitBlock      chan struct{}
29	waitError      error
30	exitError      error
31
32	os, typ string
33}
34
35func newSystem(id string) *System {
36	return &System{
37		id:        id,
38		waitBlock: make(chan struct{}),
39	}
40}
41
42// CreateComputeSystem creates a new compute system with the given configuration but does not start it.
43func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface interface{}) (_ *System, err error) {
44	operation := "hcsshim::CreateComputeSystem"
45
46	// hcsCreateComputeSystemContext is an async operation. Start the outer span
47	// here to measure the full create time.
48	ctx, span := trace.StartSpan(ctx, operation)
49	defer span.End()
50	defer func() { oc.SetSpanStatus(span, err) }()
51	span.AddAttributes(trace.StringAttribute("cid", id))
52
53	computeSystem := newSystem(id)
54
55	hcsDocumentB, err := json.Marshal(hcsDocumentInterface)
56	if err != nil {
57		return nil, err
58	}
59
60	hcsDocument := string(hcsDocumentB)
61
62	var (
63		identity    syscall.Handle
64		resultJSON  string
65		createError error
66	)
67	computeSystem.handle, resultJSON, createError = vmcompute.HcsCreateComputeSystem(ctx, id, hcsDocument, identity)
68	if createError == nil || IsPending(createError) {
69		defer func() {
70			if err != nil {
71				computeSystem.Close()
72			}
73		}()
74		if err = computeSystem.registerCallback(ctx); err != nil {
75			// Terminate the compute system if it still exists. We're okay to
76			// ignore a failure here.
77			computeSystem.Terminate(ctx)
78			return nil, makeSystemError(computeSystem, operation, "", err, nil)
79		}
80	}
81
82	events, err := processAsyncHcsResult(ctx, createError, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemCreateCompleted, &timeout.SystemCreate)
83	if err != nil {
84		if err == ErrTimeout {
85			// Terminate the compute system if it still exists. We're okay to
86			// ignore a failure here.
87			computeSystem.Terminate(ctx)
88		}
89		return nil, makeSystemError(computeSystem, operation, hcsDocument, err, events)
90	}
91	go computeSystem.waitBackground()
92	if err = computeSystem.getCachedProperties(ctx); err != nil {
93		return nil, err
94	}
95	return computeSystem, nil
96}
97
98// OpenComputeSystem opens an existing compute system by ID.
99func OpenComputeSystem(ctx context.Context, id string) (*System, error) {
100	operation := "hcsshim::OpenComputeSystem"
101
102	computeSystem := newSystem(id)
103	handle, resultJSON, err := vmcompute.HcsOpenComputeSystem(ctx, id)
104	events := processHcsResult(ctx, resultJSON)
105	if err != nil {
106		return nil, makeSystemError(computeSystem, operation, "", err, events)
107	}
108	computeSystem.handle = handle
109	defer func() {
110		if err != nil {
111			computeSystem.Close()
112		}
113	}()
114	if err = computeSystem.registerCallback(ctx); err != nil {
115		return nil, makeSystemError(computeSystem, operation, "", err, nil)
116	}
117	go computeSystem.waitBackground()
118	if err = computeSystem.getCachedProperties(ctx); err != nil {
119		return nil, err
120	}
121	return computeSystem, nil
122}
123
124func (computeSystem *System) getCachedProperties(ctx context.Context) error {
125	props, err := computeSystem.Properties(ctx)
126	if err != nil {
127		return err
128	}
129	computeSystem.typ = strings.ToLower(props.SystemType)
130	computeSystem.os = strings.ToLower(props.RuntimeOSType)
131	if computeSystem.os == "" && computeSystem.typ == "container" {
132		// Pre-RS5 HCS did not return the OS, but it only supported containers
133		// that ran Windows.
134		computeSystem.os = "windows"
135	}
136	return nil
137}
138
139// OS returns the operating system of the compute system, "linux" or "windows".
140func (computeSystem *System) OS() string {
141	return computeSystem.os
142}
143
144// IsOCI returns whether processes in the compute system should be created via
145// OCI.
146func (computeSystem *System) IsOCI() bool {
147	return computeSystem.os == "linux" && computeSystem.typ == "container"
148}
149
150// GetComputeSystems gets a list of the compute systems on the system that match the query
151func GetComputeSystems(ctx context.Context, q schema1.ComputeSystemQuery) ([]schema1.ContainerProperties, error) {
152	operation := "hcsshim::GetComputeSystems"
153
154	queryb, err := json.Marshal(q)
155	if err != nil {
156		return nil, err
157	}
158
159	computeSystemsJSON, resultJSON, err := vmcompute.HcsEnumerateComputeSystems(ctx, string(queryb))
160	events := processHcsResult(ctx, resultJSON)
161	if err != nil {
162		return nil, &HcsError{Op: operation, Err: err, Events: events}
163	}
164
165	if computeSystemsJSON == "" {
166		return nil, ErrUnexpectedValue
167	}
168	computeSystems := []schema1.ContainerProperties{}
169	if err = json.Unmarshal([]byte(computeSystemsJSON), &computeSystems); err != nil {
170		return nil, err
171	}
172
173	return computeSystems, nil
174}
175
176// Start synchronously starts the computeSystem.
177func (computeSystem *System) Start(ctx context.Context) (err error) {
178	operation := "hcsshim::System::Start"
179
180	// hcsStartComputeSystemContext is an async operation. Start the outer span
181	// here to measure the full start time.
182	ctx, span := trace.StartSpan(ctx, operation)
183	defer span.End()
184	defer func() { oc.SetSpanStatus(span, err) }()
185	span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
186
187	computeSystem.handleLock.RLock()
188	defer computeSystem.handleLock.RUnlock()
189
190	if computeSystem.handle == 0 {
191		return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
192	}
193
194	resultJSON, err := vmcompute.HcsStartComputeSystem(ctx, computeSystem.handle, "")
195	events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemStartCompleted, &timeout.SystemStart)
196	if err != nil {
197		return makeSystemError(computeSystem, operation, "", err, events)
198	}
199
200	return nil
201}
202
203// ID returns the compute system's identifier.
204func (computeSystem *System) ID() string {
205	return computeSystem.id
206}
207
208// Shutdown requests a compute system shutdown.
209func (computeSystem *System) Shutdown(ctx context.Context) error {
210	computeSystem.handleLock.RLock()
211	defer computeSystem.handleLock.RUnlock()
212
213	operation := "hcsshim::System::Shutdown"
214
215	if computeSystem.handle == 0 {
216		return nil
217	}
218
219	resultJSON, err := vmcompute.HcsShutdownComputeSystem(ctx, computeSystem.handle, "")
220	events := processHcsResult(ctx, resultJSON)
221	switch err {
222	case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending:
223	default:
224		return makeSystemError(computeSystem, operation, "", err, events)
225	}
226	return nil
227}
228
229// Terminate requests a compute system terminate.
230func (computeSystem *System) Terminate(ctx context.Context) error {
231	computeSystem.handleLock.RLock()
232	defer computeSystem.handleLock.RUnlock()
233
234	operation := "hcsshim::System::Terminate"
235
236	if computeSystem.handle == 0 {
237		return nil
238	}
239
240	resultJSON, err := vmcompute.HcsTerminateComputeSystem(ctx, computeSystem.handle, "")
241	events := processHcsResult(ctx, resultJSON)
242	switch err {
243	case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending:
244	default:
245		return makeSystemError(computeSystem, operation, "", err, events)
246	}
247	return nil
248}
249
250// waitBackground waits for the compute system exit notification. Once received
251// sets `computeSystem.waitError` (if any) and unblocks all `Wait` calls.
252//
253// This MUST be called exactly once per `computeSystem.handle` but `Wait` is
254// safe to call multiple times.
255func (computeSystem *System) waitBackground() {
256	operation := "hcsshim::System::waitBackground"
257	ctx, span := trace.StartSpan(context.Background(), operation)
258	defer span.End()
259	span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
260
261	err := waitForNotification(ctx, computeSystem.callbackNumber, hcsNotificationSystemExited, nil)
262	switch err {
263	case nil:
264		log.G(ctx).Debug("system exited")
265	case ErrVmcomputeUnexpectedExit:
266		log.G(ctx).Debug("unexpected system exit")
267		computeSystem.exitError = makeSystemError(computeSystem, operation, "", err, nil)
268		err = nil
269	default:
270		err = makeSystemError(computeSystem, operation, "", err, nil)
271	}
272	computeSystem.closedWaitOnce.Do(func() {
273		computeSystem.waitError = err
274		close(computeSystem.waitBlock)
275	})
276	oc.SetSpanStatus(span, err)
277}
278
279// Wait synchronously waits for the compute system to shutdown or terminate. If
280// the compute system has already exited returns the previous error (if any).
281func (computeSystem *System) Wait() error {
282	<-computeSystem.waitBlock
283	return computeSystem.waitError
284}
285
286// ExitError returns an error describing the reason the compute system terminated.
287func (computeSystem *System) ExitError() error {
288	select {
289	case <-computeSystem.waitBlock:
290		if computeSystem.waitError != nil {
291			return computeSystem.waitError
292		}
293		return computeSystem.exitError
294	default:
295		return errors.New("container not exited")
296	}
297}
298
299// Properties returns the requested container properties targeting a V1 schema container.
300func (computeSystem *System) Properties(ctx context.Context, types ...schema1.PropertyType) (*schema1.ContainerProperties, error) {
301	computeSystem.handleLock.RLock()
302	defer computeSystem.handleLock.RUnlock()
303
304	operation := "hcsshim::System::Properties"
305
306	queryBytes, err := json.Marshal(schema1.PropertyQuery{PropertyTypes: types})
307	if err != nil {
308		return nil, makeSystemError(computeSystem, operation, "", err, nil)
309	}
310
311	propertiesJSON, resultJSON, err := vmcompute.HcsGetComputeSystemProperties(ctx, computeSystem.handle, string(queryBytes))
312	events := processHcsResult(ctx, resultJSON)
313	if err != nil {
314		return nil, makeSystemError(computeSystem, operation, "", err, events)
315	}
316
317	if propertiesJSON == "" {
318		return nil, ErrUnexpectedValue
319	}
320	properties := &schema1.ContainerProperties{}
321	if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil {
322		return nil, makeSystemError(computeSystem, operation, "", err, nil)
323	}
324
325	return properties, nil
326}
327
328// PropertiesV2 returns the requested container properties targeting a V2 schema container.
329func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) {
330	computeSystem.handleLock.RLock()
331	defer computeSystem.handleLock.RUnlock()
332
333	operation := "hcsshim::System::PropertiesV2"
334
335	queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types})
336	if err != nil {
337		return nil, makeSystemError(computeSystem, operation, "", err, nil)
338	}
339
340	propertiesJSON, resultJSON, err := vmcompute.HcsGetComputeSystemProperties(ctx, computeSystem.handle, string(queryBytes))
341	events := processHcsResult(ctx, resultJSON)
342	if err != nil {
343		return nil, makeSystemError(computeSystem, operation, "", err, events)
344	}
345
346	if propertiesJSON == "" {
347		return nil, ErrUnexpectedValue
348	}
349	properties := &hcsschema.Properties{}
350	if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil {
351		return nil, makeSystemError(computeSystem, operation, "", err, nil)
352	}
353
354	return properties, nil
355}
356
357// Pause pauses the execution of the computeSystem. This feature is not enabled in TP5.
358func (computeSystem *System) Pause(ctx context.Context) (err error) {
359	operation := "hcsshim::System::Pause"
360
361	// hcsPauseComputeSystemContext is an async peration. Start the outer span
362	// here to measure the full pause time.
363	ctx, span := trace.StartSpan(ctx, operation)
364	defer span.End()
365	defer func() { oc.SetSpanStatus(span, err) }()
366	span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
367
368	computeSystem.handleLock.RLock()
369	defer computeSystem.handleLock.RUnlock()
370
371	if computeSystem.handle == 0 {
372		return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
373	}
374
375	resultJSON, err := vmcompute.HcsPauseComputeSystem(ctx, computeSystem.handle, "")
376	events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemPauseCompleted, &timeout.SystemPause)
377	if err != nil {
378		return makeSystemError(computeSystem, operation, "", err, events)
379	}
380
381	return nil
382}
383
384// Resume resumes the execution of the computeSystem. This feature is not enabled in TP5.
385func (computeSystem *System) Resume(ctx context.Context) (err error) {
386	operation := "hcsshim::System::Resume"
387
388	// hcsResumeComputeSystemContext is an async operation. Start the outer span
389	// here to measure the full restore time.
390	ctx, span := trace.StartSpan(ctx, operation)
391	defer span.End()
392	defer func() { oc.SetSpanStatus(span, err) }()
393	span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
394
395	computeSystem.handleLock.RLock()
396	defer computeSystem.handleLock.RUnlock()
397
398	if computeSystem.handle == 0 {
399		return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
400	}
401
402	resultJSON, err := vmcompute.HcsResumeComputeSystem(ctx, computeSystem.handle, "")
403	events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemResumeCompleted, &timeout.SystemResume)
404	if err != nil {
405		return makeSystemError(computeSystem, operation, "", err, events)
406	}
407
408	return nil
409}
410
411func (computeSystem *System) createProcess(ctx context.Context, operation string, c interface{}) (*Process, *vmcompute.HcsProcessInformation, error) {
412	computeSystem.handleLock.RLock()
413	defer computeSystem.handleLock.RUnlock()
414
415	if computeSystem.handle == 0 {
416		return nil, nil, makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
417	}
418
419	configurationb, err := json.Marshal(c)
420	if err != nil {
421		return nil, nil, makeSystemError(computeSystem, operation, "", err, nil)
422	}
423
424	configuration := string(configurationb)
425	processInfo, processHandle, resultJSON, err := vmcompute.HcsCreateProcess(ctx, computeSystem.handle, configuration)
426	events := processHcsResult(ctx, resultJSON)
427	if err != nil {
428		return nil, nil, makeSystemError(computeSystem, operation, configuration, err, events)
429	}
430
431	log.G(ctx).WithField("pid", processInfo.ProcessId).Debug("created process pid")
432	return newProcess(processHandle, int(processInfo.ProcessId), computeSystem), &processInfo, nil
433}
434
435// CreateProcess launches a new process within the computeSystem.
436func (computeSystem *System) CreateProcess(ctx context.Context, c interface{}) (cow.Process, error) {
437	operation := "hcsshim::System::CreateProcess"
438	process, processInfo, err := computeSystem.createProcess(ctx, operation, c)
439	if err != nil {
440		return nil, err
441	}
442	defer func() {
443		if err != nil {
444			process.Close()
445		}
446	}()
447
448	pipes, err := makeOpenFiles([]syscall.Handle{processInfo.StdInput, processInfo.StdOutput, processInfo.StdError})
449	if err != nil {
450		return nil, makeSystemError(computeSystem, operation, "", err, nil)
451	}
452	process.stdin = pipes[0]
453	process.stdout = pipes[1]
454	process.stderr = pipes[2]
455	process.hasCachedStdio = true
456
457	if err = process.registerCallback(ctx); err != nil {
458		return nil, makeSystemError(computeSystem, operation, "", err, nil)
459	}
460	go process.waitBackground()
461
462	return process, nil
463}
464
465// OpenProcess gets an interface to an existing process within the computeSystem.
466func (computeSystem *System) OpenProcess(ctx context.Context, pid int) (*Process, error) {
467	computeSystem.handleLock.RLock()
468	defer computeSystem.handleLock.RUnlock()
469
470	operation := "hcsshim::System::OpenProcess"
471
472	if computeSystem.handle == 0 {
473		return nil, makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
474	}
475
476	processHandle, resultJSON, err := vmcompute.HcsOpenProcess(ctx, computeSystem.handle, uint32(pid))
477	events := processHcsResult(ctx, resultJSON)
478	if err != nil {
479		return nil, makeSystemError(computeSystem, operation, "", err, events)
480	}
481
482	process := newProcess(processHandle, pid, computeSystem)
483	if err = process.registerCallback(ctx); err != nil {
484		return nil, makeSystemError(computeSystem, operation, "", err, nil)
485	}
486	go process.waitBackground()
487
488	return process, nil
489}
490
491// Close cleans up any state associated with the compute system but does not terminate or wait for it.
492func (computeSystem *System) Close() (err error) {
493	operation := "hcsshim::System::Close"
494	ctx, span := trace.StartSpan(context.Background(), operation)
495	defer span.End()
496	defer func() { oc.SetSpanStatus(span, err) }()
497	span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
498
499	computeSystem.handleLock.Lock()
500	defer computeSystem.handleLock.Unlock()
501
502	// Don't double free this
503	if computeSystem.handle == 0 {
504		return nil
505	}
506
507	if err = computeSystem.unregisterCallback(ctx); err != nil {
508		return makeSystemError(computeSystem, operation, "", err, nil)
509	}
510
511	err = vmcompute.HcsCloseComputeSystem(ctx, computeSystem.handle)
512	if err != nil {
513		return makeSystemError(computeSystem, operation, "", err, nil)
514	}
515
516	computeSystem.handle = 0
517	computeSystem.closedWaitOnce.Do(func() {
518		computeSystem.waitError = ErrAlreadyClosed
519		close(computeSystem.waitBlock)
520	})
521
522	return nil
523}
524
525func (computeSystem *System) registerCallback(ctx context.Context) error {
526	callbackContext := &notifcationWatcherContext{
527		channels: newSystemChannels(),
528		systemID: computeSystem.id,
529	}
530
531	callbackMapLock.Lock()
532	callbackNumber := nextCallback
533	nextCallback++
534	callbackMap[callbackNumber] = callbackContext
535	callbackMapLock.Unlock()
536
537	callbackHandle, err := vmcompute.HcsRegisterComputeSystemCallback(ctx, computeSystem.handle, notificationWatcherCallback, callbackNumber)
538	if err != nil {
539		return err
540	}
541	callbackContext.handle = callbackHandle
542	computeSystem.callbackNumber = callbackNumber
543
544	return nil
545}
546
547func (computeSystem *System) unregisterCallback(ctx context.Context) error {
548	callbackNumber := computeSystem.callbackNumber
549
550	callbackMapLock.RLock()
551	callbackContext := callbackMap[callbackNumber]
552	callbackMapLock.RUnlock()
553
554	if callbackContext == nil {
555		return nil
556	}
557
558	handle := callbackContext.handle
559
560	if handle == 0 {
561		return nil
562	}
563
564	// hcsUnregisterComputeSystemCallback has its own syncronization
565	// to wait for all callbacks to complete. We must NOT hold the callbackMapLock.
566	err := vmcompute.HcsUnregisterComputeSystemCallback(ctx, handle)
567	if err != nil {
568		return err
569	}
570
571	closeChannels(callbackContext.channels)
572
573	callbackMapLock.Lock()
574	delete(callbackMap, callbackNumber)
575	callbackMapLock.Unlock()
576
577	handle = 0
578
579	return nil
580}
581
582// Modify the System by sending a request to HCS
583func (computeSystem *System) Modify(ctx context.Context, config interface{}) error {
584	computeSystem.handleLock.RLock()
585	defer computeSystem.handleLock.RUnlock()
586
587	operation := "hcsshim::System::Modify"
588
589	if computeSystem.handle == 0 {
590		return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
591	}
592
593	requestBytes, err := json.Marshal(config)
594	if err != nil {
595		return err
596	}
597
598	requestJSON := string(requestBytes)
599	resultJSON, err := vmcompute.HcsModifyComputeSystem(ctx, computeSystem.handle, requestJSON)
600	events := processHcsResult(ctx, resultJSON)
601	if err != nil {
602		return makeSystemError(computeSystem, operation, requestJSON, err, events)
603	}
604
605	return nil
606}
607