1package hcs 2 3import ( 4 "context" 5 "encoding/json" 6 "errors" 7 "strings" 8 "sync" 9 "syscall" 10 11 "github.com/Microsoft/hcsshim/internal/cow" 12 "github.com/Microsoft/hcsshim/internal/log" 13 "github.com/Microsoft/hcsshim/internal/oc" 14 "github.com/Microsoft/hcsshim/internal/schema1" 15 hcsschema "github.com/Microsoft/hcsshim/internal/schema2" 16 "github.com/Microsoft/hcsshim/internal/timeout" 17 "github.com/Microsoft/hcsshim/internal/vmcompute" 18 "go.opencensus.io/trace" 19) 20 21type System struct { 22 handleLock sync.RWMutex 23 handle vmcompute.HcsSystem 24 id string 25 callbackNumber uintptr 26 27 closedWaitOnce sync.Once 28 waitBlock chan struct{} 29 waitError error 30 exitError error 31 32 os, typ string 33} 34 35func newSystem(id string) *System { 36 return &System{ 37 id: id, 38 waitBlock: make(chan struct{}), 39 } 40} 41 42// CreateComputeSystem creates a new compute system with the given configuration but does not start it. 43func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface interface{}) (_ *System, err error) { 44 operation := "hcsshim::CreateComputeSystem" 45 46 // hcsCreateComputeSystemContext is an async operation. Start the outer span 47 // here to measure the full create time. 48 ctx, span := trace.StartSpan(ctx, operation) 49 defer span.End() 50 defer func() { oc.SetSpanStatus(span, err) }() 51 span.AddAttributes(trace.StringAttribute("cid", id)) 52 53 computeSystem := newSystem(id) 54 55 hcsDocumentB, err := json.Marshal(hcsDocumentInterface) 56 if err != nil { 57 return nil, err 58 } 59 60 hcsDocument := string(hcsDocumentB) 61 62 var ( 63 identity syscall.Handle 64 resultJSON string 65 createError error 66 ) 67 computeSystem.handle, resultJSON, createError = vmcompute.HcsCreateComputeSystem(ctx, id, hcsDocument, identity) 68 if createError == nil || IsPending(createError) { 69 defer func() { 70 if err != nil { 71 computeSystem.Close() 72 } 73 }() 74 if err = computeSystem.registerCallback(ctx); err != nil { 75 // Terminate the compute system if it still exists. We're okay to 76 // ignore a failure here. 77 computeSystem.Terminate(ctx) 78 return nil, makeSystemError(computeSystem, operation, "", err, nil) 79 } 80 } 81 82 events, err := processAsyncHcsResult(ctx, createError, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemCreateCompleted, &timeout.SystemCreate) 83 if err != nil { 84 if err == ErrTimeout { 85 // Terminate the compute system if it still exists. We're okay to 86 // ignore a failure here. 87 computeSystem.Terminate(ctx) 88 } 89 return nil, makeSystemError(computeSystem, operation, hcsDocument, err, events) 90 } 91 go computeSystem.waitBackground() 92 if err = computeSystem.getCachedProperties(ctx); err != nil { 93 return nil, err 94 } 95 return computeSystem, nil 96} 97 98// OpenComputeSystem opens an existing compute system by ID. 99func OpenComputeSystem(ctx context.Context, id string) (*System, error) { 100 operation := "hcsshim::OpenComputeSystem" 101 102 computeSystem := newSystem(id) 103 handle, resultJSON, err := vmcompute.HcsOpenComputeSystem(ctx, id) 104 events := processHcsResult(ctx, resultJSON) 105 if err != nil { 106 return nil, makeSystemError(computeSystem, operation, "", err, events) 107 } 108 computeSystem.handle = handle 109 defer func() { 110 if err != nil { 111 computeSystem.Close() 112 } 113 }() 114 if err = computeSystem.registerCallback(ctx); err != nil { 115 return nil, makeSystemError(computeSystem, operation, "", err, nil) 116 } 117 go computeSystem.waitBackground() 118 if err = computeSystem.getCachedProperties(ctx); err != nil { 119 return nil, err 120 } 121 return computeSystem, nil 122} 123 124func (computeSystem *System) getCachedProperties(ctx context.Context) error { 125 props, err := computeSystem.Properties(ctx) 126 if err != nil { 127 return err 128 } 129 computeSystem.typ = strings.ToLower(props.SystemType) 130 computeSystem.os = strings.ToLower(props.RuntimeOSType) 131 if computeSystem.os == "" && computeSystem.typ == "container" { 132 // Pre-RS5 HCS did not return the OS, but it only supported containers 133 // that ran Windows. 134 computeSystem.os = "windows" 135 } 136 return nil 137} 138 139// OS returns the operating system of the compute system, "linux" or "windows". 140func (computeSystem *System) OS() string { 141 return computeSystem.os 142} 143 144// IsOCI returns whether processes in the compute system should be created via 145// OCI. 146func (computeSystem *System) IsOCI() bool { 147 return computeSystem.os == "linux" && computeSystem.typ == "container" 148} 149 150// GetComputeSystems gets a list of the compute systems on the system that match the query 151func GetComputeSystems(ctx context.Context, q schema1.ComputeSystemQuery) ([]schema1.ContainerProperties, error) { 152 operation := "hcsshim::GetComputeSystems" 153 154 queryb, err := json.Marshal(q) 155 if err != nil { 156 return nil, err 157 } 158 159 computeSystemsJSON, resultJSON, err := vmcompute.HcsEnumerateComputeSystems(ctx, string(queryb)) 160 events := processHcsResult(ctx, resultJSON) 161 if err != nil { 162 return nil, &HcsError{Op: operation, Err: err, Events: events} 163 } 164 165 if computeSystemsJSON == "" { 166 return nil, ErrUnexpectedValue 167 } 168 computeSystems := []schema1.ContainerProperties{} 169 if err = json.Unmarshal([]byte(computeSystemsJSON), &computeSystems); err != nil { 170 return nil, err 171 } 172 173 return computeSystems, nil 174} 175 176// Start synchronously starts the computeSystem. 177func (computeSystem *System) Start(ctx context.Context) (err error) { 178 operation := "hcsshim::System::Start" 179 180 // hcsStartComputeSystemContext is an async operation. Start the outer span 181 // here to measure the full start time. 182 ctx, span := trace.StartSpan(ctx, operation) 183 defer span.End() 184 defer func() { oc.SetSpanStatus(span, err) }() 185 span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) 186 187 computeSystem.handleLock.RLock() 188 defer computeSystem.handleLock.RUnlock() 189 190 if computeSystem.handle == 0 { 191 return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil) 192 } 193 194 resultJSON, err := vmcompute.HcsStartComputeSystem(ctx, computeSystem.handle, "") 195 events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemStartCompleted, &timeout.SystemStart) 196 if err != nil { 197 return makeSystemError(computeSystem, operation, "", err, events) 198 } 199 200 return nil 201} 202 203// ID returns the compute system's identifier. 204func (computeSystem *System) ID() string { 205 return computeSystem.id 206} 207 208// Shutdown requests a compute system shutdown. 209func (computeSystem *System) Shutdown(ctx context.Context) error { 210 computeSystem.handleLock.RLock() 211 defer computeSystem.handleLock.RUnlock() 212 213 operation := "hcsshim::System::Shutdown" 214 215 if computeSystem.handle == 0 { 216 return nil 217 } 218 219 resultJSON, err := vmcompute.HcsShutdownComputeSystem(ctx, computeSystem.handle, "") 220 events := processHcsResult(ctx, resultJSON) 221 switch err { 222 case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending: 223 default: 224 return makeSystemError(computeSystem, operation, "", err, events) 225 } 226 return nil 227} 228 229// Terminate requests a compute system terminate. 230func (computeSystem *System) Terminate(ctx context.Context) error { 231 computeSystem.handleLock.RLock() 232 defer computeSystem.handleLock.RUnlock() 233 234 operation := "hcsshim::System::Terminate" 235 236 if computeSystem.handle == 0 { 237 return nil 238 } 239 240 resultJSON, err := vmcompute.HcsTerminateComputeSystem(ctx, computeSystem.handle, "") 241 events := processHcsResult(ctx, resultJSON) 242 switch err { 243 case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending: 244 default: 245 return makeSystemError(computeSystem, operation, "", err, events) 246 } 247 return nil 248} 249 250// waitBackground waits for the compute system exit notification. Once received 251// sets `computeSystem.waitError` (if any) and unblocks all `Wait` calls. 252// 253// This MUST be called exactly once per `computeSystem.handle` but `Wait` is 254// safe to call multiple times. 255func (computeSystem *System) waitBackground() { 256 operation := "hcsshim::System::waitBackground" 257 ctx, span := trace.StartSpan(context.Background(), operation) 258 defer span.End() 259 span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) 260 261 err := waitForNotification(ctx, computeSystem.callbackNumber, hcsNotificationSystemExited, nil) 262 switch err { 263 case nil: 264 log.G(ctx).Debug("system exited") 265 case ErrVmcomputeUnexpectedExit: 266 log.G(ctx).Debug("unexpected system exit") 267 computeSystem.exitError = makeSystemError(computeSystem, operation, "", err, nil) 268 err = nil 269 default: 270 err = makeSystemError(computeSystem, operation, "", err, nil) 271 } 272 computeSystem.closedWaitOnce.Do(func() { 273 computeSystem.waitError = err 274 close(computeSystem.waitBlock) 275 }) 276 oc.SetSpanStatus(span, err) 277} 278 279// Wait synchronously waits for the compute system to shutdown or terminate. If 280// the compute system has already exited returns the previous error (if any). 281func (computeSystem *System) Wait() error { 282 <-computeSystem.waitBlock 283 return computeSystem.waitError 284} 285 286// ExitError returns an error describing the reason the compute system terminated. 287func (computeSystem *System) ExitError() error { 288 select { 289 case <-computeSystem.waitBlock: 290 if computeSystem.waitError != nil { 291 return computeSystem.waitError 292 } 293 return computeSystem.exitError 294 default: 295 return errors.New("container not exited") 296 } 297} 298 299// Properties returns the requested container properties targeting a V1 schema container. 300func (computeSystem *System) Properties(ctx context.Context, types ...schema1.PropertyType) (*schema1.ContainerProperties, error) { 301 computeSystem.handleLock.RLock() 302 defer computeSystem.handleLock.RUnlock() 303 304 operation := "hcsshim::System::Properties" 305 306 queryBytes, err := json.Marshal(schema1.PropertyQuery{PropertyTypes: types}) 307 if err != nil { 308 return nil, makeSystemError(computeSystem, operation, "", err, nil) 309 } 310 311 propertiesJSON, resultJSON, err := vmcompute.HcsGetComputeSystemProperties(ctx, computeSystem.handle, string(queryBytes)) 312 events := processHcsResult(ctx, resultJSON) 313 if err != nil { 314 return nil, makeSystemError(computeSystem, operation, "", err, events) 315 } 316 317 if propertiesJSON == "" { 318 return nil, ErrUnexpectedValue 319 } 320 properties := &schema1.ContainerProperties{} 321 if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil { 322 return nil, makeSystemError(computeSystem, operation, "", err, nil) 323 } 324 325 return properties, nil 326} 327 328// PropertiesV2 returns the requested container properties targeting a V2 schema container. 329func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) { 330 computeSystem.handleLock.RLock() 331 defer computeSystem.handleLock.RUnlock() 332 333 operation := "hcsshim::System::PropertiesV2" 334 335 queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types}) 336 if err != nil { 337 return nil, makeSystemError(computeSystem, operation, "", err, nil) 338 } 339 340 propertiesJSON, resultJSON, err := vmcompute.HcsGetComputeSystemProperties(ctx, computeSystem.handle, string(queryBytes)) 341 events := processHcsResult(ctx, resultJSON) 342 if err != nil { 343 return nil, makeSystemError(computeSystem, operation, "", err, events) 344 } 345 346 if propertiesJSON == "" { 347 return nil, ErrUnexpectedValue 348 } 349 properties := &hcsschema.Properties{} 350 if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil { 351 return nil, makeSystemError(computeSystem, operation, "", err, nil) 352 } 353 354 return properties, nil 355} 356 357// Pause pauses the execution of the computeSystem. This feature is not enabled in TP5. 358func (computeSystem *System) Pause(ctx context.Context) (err error) { 359 operation := "hcsshim::System::Pause" 360 361 // hcsPauseComputeSystemContext is an async peration. Start the outer span 362 // here to measure the full pause time. 363 ctx, span := trace.StartSpan(ctx, operation) 364 defer span.End() 365 defer func() { oc.SetSpanStatus(span, err) }() 366 span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) 367 368 computeSystem.handleLock.RLock() 369 defer computeSystem.handleLock.RUnlock() 370 371 if computeSystem.handle == 0 { 372 return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil) 373 } 374 375 resultJSON, err := vmcompute.HcsPauseComputeSystem(ctx, computeSystem.handle, "") 376 events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemPauseCompleted, &timeout.SystemPause) 377 if err != nil { 378 return makeSystemError(computeSystem, operation, "", err, events) 379 } 380 381 return nil 382} 383 384// Resume resumes the execution of the computeSystem. This feature is not enabled in TP5. 385func (computeSystem *System) Resume(ctx context.Context) (err error) { 386 operation := "hcsshim::System::Resume" 387 388 // hcsResumeComputeSystemContext is an async operation. Start the outer span 389 // here to measure the full restore time. 390 ctx, span := trace.StartSpan(ctx, operation) 391 defer span.End() 392 defer func() { oc.SetSpanStatus(span, err) }() 393 span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) 394 395 computeSystem.handleLock.RLock() 396 defer computeSystem.handleLock.RUnlock() 397 398 if computeSystem.handle == 0 { 399 return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil) 400 } 401 402 resultJSON, err := vmcompute.HcsResumeComputeSystem(ctx, computeSystem.handle, "") 403 events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemResumeCompleted, &timeout.SystemResume) 404 if err != nil { 405 return makeSystemError(computeSystem, operation, "", err, events) 406 } 407 408 return nil 409} 410 411func (computeSystem *System) createProcess(ctx context.Context, operation string, c interface{}) (*Process, *vmcompute.HcsProcessInformation, error) { 412 computeSystem.handleLock.RLock() 413 defer computeSystem.handleLock.RUnlock() 414 415 if computeSystem.handle == 0 { 416 return nil, nil, makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil) 417 } 418 419 configurationb, err := json.Marshal(c) 420 if err != nil { 421 return nil, nil, makeSystemError(computeSystem, operation, "", err, nil) 422 } 423 424 configuration := string(configurationb) 425 processInfo, processHandle, resultJSON, err := vmcompute.HcsCreateProcess(ctx, computeSystem.handle, configuration) 426 events := processHcsResult(ctx, resultJSON) 427 if err != nil { 428 return nil, nil, makeSystemError(computeSystem, operation, configuration, err, events) 429 } 430 431 log.G(ctx).WithField("pid", processInfo.ProcessId).Debug("created process pid") 432 return newProcess(processHandle, int(processInfo.ProcessId), computeSystem), &processInfo, nil 433} 434 435// CreateProcess launches a new process within the computeSystem. 436func (computeSystem *System) CreateProcess(ctx context.Context, c interface{}) (cow.Process, error) { 437 operation := "hcsshim::System::CreateProcess" 438 process, processInfo, err := computeSystem.createProcess(ctx, operation, c) 439 if err != nil { 440 return nil, err 441 } 442 defer func() { 443 if err != nil { 444 process.Close() 445 } 446 }() 447 448 pipes, err := makeOpenFiles([]syscall.Handle{processInfo.StdInput, processInfo.StdOutput, processInfo.StdError}) 449 if err != nil { 450 return nil, makeSystemError(computeSystem, operation, "", err, nil) 451 } 452 process.stdin = pipes[0] 453 process.stdout = pipes[1] 454 process.stderr = pipes[2] 455 process.hasCachedStdio = true 456 457 if err = process.registerCallback(ctx); err != nil { 458 return nil, makeSystemError(computeSystem, operation, "", err, nil) 459 } 460 go process.waitBackground() 461 462 return process, nil 463} 464 465// OpenProcess gets an interface to an existing process within the computeSystem. 466func (computeSystem *System) OpenProcess(ctx context.Context, pid int) (*Process, error) { 467 computeSystem.handleLock.RLock() 468 defer computeSystem.handleLock.RUnlock() 469 470 operation := "hcsshim::System::OpenProcess" 471 472 if computeSystem.handle == 0 { 473 return nil, makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil) 474 } 475 476 processHandle, resultJSON, err := vmcompute.HcsOpenProcess(ctx, computeSystem.handle, uint32(pid)) 477 events := processHcsResult(ctx, resultJSON) 478 if err != nil { 479 return nil, makeSystemError(computeSystem, operation, "", err, events) 480 } 481 482 process := newProcess(processHandle, pid, computeSystem) 483 if err = process.registerCallback(ctx); err != nil { 484 return nil, makeSystemError(computeSystem, operation, "", err, nil) 485 } 486 go process.waitBackground() 487 488 return process, nil 489} 490 491// Close cleans up any state associated with the compute system but does not terminate or wait for it. 492func (computeSystem *System) Close() (err error) { 493 operation := "hcsshim::System::Close" 494 ctx, span := trace.StartSpan(context.Background(), operation) 495 defer span.End() 496 defer func() { oc.SetSpanStatus(span, err) }() 497 span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) 498 499 computeSystem.handleLock.Lock() 500 defer computeSystem.handleLock.Unlock() 501 502 // Don't double free this 503 if computeSystem.handle == 0 { 504 return nil 505 } 506 507 if err = computeSystem.unregisterCallback(ctx); err != nil { 508 return makeSystemError(computeSystem, operation, "", err, nil) 509 } 510 511 err = vmcompute.HcsCloseComputeSystem(ctx, computeSystem.handle) 512 if err != nil { 513 return makeSystemError(computeSystem, operation, "", err, nil) 514 } 515 516 computeSystem.handle = 0 517 computeSystem.closedWaitOnce.Do(func() { 518 computeSystem.waitError = ErrAlreadyClosed 519 close(computeSystem.waitBlock) 520 }) 521 522 return nil 523} 524 525func (computeSystem *System) registerCallback(ctx context.Context) error { 526 callbackContext := ¬ifcationWatcherContext{ 527 channels: newSystemChannels(), 528 systemID: computeSystem.id, 529 } 530 531 callbackMapLock.Lock() 532 callbackNumber := nextCallback 533 nextCallback++ 534 callbackMap[callbackNumber] = callbackContext 535 callbackMapLock.Unlock() 536 537 callbackHandle, err := vmcompute.HcsRegisterComputeSystemCallback(ctx, computeSystem.handle, notificationWatcherCallback, callbackNumber) 538 if err != nil { 539 return err 540 } 541 callbackContext.handle = callbackHandle 542 computeSystem.callbackNumber = callbackNumber 543 544 return nil 545} 546 547func (computeSystem *System) unregisterCallback(ctx context.Context) error { 548 callbackNumber := computeSystem.callbackNumber 549 550 callbackMapLock.RLock() 551 callbackContext := callbackMap[callbackNumber] 552 callbackMapLock.RUnlock() 553 554 if callbackContext == nil { 555 return nil 556 } 557 558 handle := callbackContext.handle 559 560 if handle == 0 { 561 return nil 562 } 563 564 // hcsUnregisterComputeSystemCallback has its own syncronization 565 // to wait for all callbacks to complete. We must NOT hold the callbackMapLock. 566 err := vmcompute.HcsUnregisterComputeSystemCallback(ctx, handle) 567 if err != nil { 568 return err 569 } 570 571 closeChannels(callbackContext.channels) 572 573 callbackMapLock.Lock() 574 delete(callbackMap, callbackNumber) 575 callbackMapLock.Unlock() 576 577 handle = 0 578 579 return nil 580} 581 582// Modify the System by sending a request to HCS 583func (computeSystem *System) Modify(ctx context.Context, config interface{}) error { 584 computeSystem.handleLock.RLock() 585 defer computeSystem.handleLock.RUnlock() 586 587 operation := "hcsshim::System::Modify" 588 589 if computeSystem.handle == 0 { 590 return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil) 591 } 592 593 requestBytes, err := json.Marshal(config) 594 if err != nil { 595 return err 596 } 597 598 requestJSON := string(requestBytes) 599 resultJSON, err := vmcompute.HcsModifyComputeSystem(ctx, computeSystem.handle, requestJSON) 600 events := processHcsResult(ctx, resultJSON) 601 if err != nil { 602 return makeSystemError(computeSystem, operation, requestJSON, err, events) 603 } 604 605 return nil 606} 607