1// +build !dockerless 2 3/* 4Copyright 2016 The Kubernetes Authors. 5 6Licensed under the Apache License, Version 2.0 (the "License"); 7you may not use this file except in compliance with the License. 8You may obtain a copy of the License at 9 10 http://www.apache.org/licenses/LICENSE-2.0 11 12Unless required by applicable law or agreed to in writing, software 13distributed under the License is distributed on an "AS IS" BASIS, 14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15See the License for the specific language governing permissions and 16limitations under the License. 17*/ 18 19package dockershim 20 21import ( 22 "context" 23 "encoding/json" 24 "fmt" 25 "os" 26 "strings" 27 "time" 28 29 dockertypes "github.com/docker/docker/api/types" 30 dockercontainer "github.com/docker/docker/api/types/container" 31 dockerfilters "github.com/docker/docker/api/types/filters" 32 utilerrors "k8s.io/apimachinery/pkg/util/errors" 33 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" 34 "k8s.io/klog/v2" 35 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" 36 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" 37 kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" 38 "k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker" 39 "k8s.io/kubernetes/pkg/kubelet/types" 40) 41 42const ( 43 defaultSandboxImage = "k8s.gcr.io/pause:3.5" 44 45 // Various default sandbox resources requests/limits. 46 defaultSandboxCPUshares int64 = 2 47 48 // defaultSandboxOOMAdj is the oom score adjustment for the docker 49 // sandbox container. Using this OOM adj makes it very unlikely, but not 50 // impossible, that the defaultSandox will experience an oom kill. -998 51 // is chosen to signify sandbox should be OOM killed before other more 52 // vital processes like the docker daemon, the kubelet, etc... 53 defaultSandboxOOMAdj int = -998 54 55 // Name of the underlying container runtime 56 runtimeName = "docker" 57) 58 59var ( 60 // Termination grace period 61 defaultSandboxGracePeriod = time.Duration(10) * time.Second 62) 63 64// Returns whether the sandbox network is ready, and whether the sandbox is known 65func (ds *dockerService) getNetworkReady(podSandboxID string) (bool, bool) { 66 ds.networkReadyLock.Lock() 67 defer ds.networkReadyLock.Unlock() 68 ready, ok := ds.networkReady[podSandboxID] 69 return ready, ok 70} 71 72func (ds *dockerService) setNetworkReady(podSandboxID string, ready bool) { 73 ds.networkReadyLock.Lock() 74 defer ds.networkReadyLock.Unlock() 75 ds.networkReady[podSandboxID] = ready 76} 77 78func (ds *dockerService) clearNetworkReady(podSandboxID string) { 79 ds.networkReadyLock.Lock() 80 defer ds.networkReadyLock.Unlock() 81 delete(ds.networkReady, podSandboxID) 82} 83 84// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure 85// the sandbox is in ready state. 86// For docker, PodSandbox is implemented by a container holding the network 87// namespace for the pod. 88// Note: docker doesn't use LogDirectory (yet). 89func (ds *dockerService) RunPodSandbox(ctx context.Context, r *runtimeapi.RunPodSandboxRequest) (*runtimeapi.RunPodSandboxResponse, error) { 90 config := r.GetConfig() 91 92 // Step 1: Pull the image for the sandbox. 93 image := defaultSandboxImage 94 podSandboxImage := ds.podSandboxImage 95 if len(podSandboxImage) != 0 { 96 image = podSandboxImage 97 } 98 99 // NOTE: To use a custom sandbox image in a private repository, users need to configure the nodes with credentials properly. 100 // see: https://kubernetes.io/docs/user-guide/images/#configuring-nodes-to-authenticate-to-a-private-registry 101 // Only pull sandbox image when it's not present - v1.PullIfNotPresent. 102 if err := ensureSandboxImageExists(ds.client, image); err != nil { 103 return nil, err 104 } 105 106 // Step 2: Create the sandbox container. 107 if r.GetRuntimeHandler() != "" && r.GetRuntimeHandler() != runtimeName { 108 return nil, fmt.Errorf("RuntimeHandler %q not supported", r.GetRuntimeHandler()) 109 } 110 createConfig, err := ds.makeSandboxDockerConfig(config, image) 111 if err != nil { 112 return nil, fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err) 113 } 114 createResp, err := ds.client.CreateContainer(*createConfig) 115 if err != nil { 116 createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err) 117 } 118 119 if err != nil || createResp == nil { 120 return nil, fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err) 121 } 122 resp := &runtimeapi.RunPodSandboxResponse{PodSandboxId: createResp.ID} 123 124 ds.setNetworkReady(createResp.ID, false) 125 defer func(e *error) { 126 // Set networking ready depending on the error return of 127 // the parent function 128 if *e == nil { 129 ds.setNetworkReady(createResp.ID, true) 130 } 131 }(&err) 132 133 // Step 3: Create Sandbox Checkpoint. 134 if err = ds.checkpointManager.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil { 135 return nil, err 136 } 137 138 // Step 4: Start the sandbox container. 139 // Assume kubelet's garbage collector would remove the sandbox later, if 140 // startContainer failed. 141 err = ds.client.StartContainer(createResp.ID) 142 if err != nil { 143 return nil, fmt.Errorf("failed to start sandbox container for pod %q: %v", config.Metadata.Name, err) 144 } 145 146 // Rewrite resolv.conf file generated by docker. 147 // NOTE: cluster dns settings aren't passed anymore to docker api in all cases, 148 // not only for pods with host network: the resolver conf will be overwritten 149 // after sandbox creation to override docker's behaviour. This resolv.conf 150 // file is shared by all containers of the same pod, and needs to be modified 151 // only once per pod. 152 if dnsConfig := config.GetDnsConfig(); dnsConfig != nil { 153 containerInfo, err := ds.client.InspectContainer(createResp.ID) 154 if err != nil { 155 return nil, fmt.Errorf("failed to inspect sandbox container for pod %q: %v", config.Metadata.Name, err) 156 } 157 158 if err := rewriteResolvFile(containerInfo.ResolvConfPath, dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options); err != nil { 159 return nil, fmt.Errorf("rewrite resolv.conf failed for pod %q: %v", config.Metadata.Name, err) 160 } 161 } 162 163 // Do not invoke network plugins if in hostNetwork mode. 164 if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtimeapi.NamespaceMode_NODE { 165 return resp, nil 166 } 167 168 // Step 5: Setup networking for the sandbox. 169 // All pod networking is setup by a CNI plugin discovered at startup time. 170 // This plugin assigns the pod ip, sets up routes inside the sandbox, 171 // creates interfaces etc. In theory, its jurisdiction ends with pod 172 // sandbox networking, but it might insert iptables rules or open ports 173 // on the host as well, to satisfy parts of the pod spec that aren't 174 // recognized by the CNI standard yet. 175 cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID) 176 networkOptions := make(map[string]string) 177 if dnsConfig := config.GetDnsConfig(); dnsConfig != nil { 178 // Build DNS options. 179 dnsOption, err := json.Marshal(dnsConfig) 180 if err != nil { 181 return nil, fmt.Errorf("failed to marshal dns config for pod %q: %v", config.Metadata.Name, err) 182 } 183 networkOptions["dns"] = string(dnsOption) 184 } 185 err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations, networkOptions) 186 if err != nil { 187 errList := []error{fmt.Errorf("failed to set up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err)} 188 189 // Ensure network resources are cleaned up even if the plugin 190 // succeeded but an error happened between that success and here. 191 err = ds.network.TearDownPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID) 192 if err != nil { 193 errList = append(errList, fmt.Errorf("failed to clean up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err)) 194 } 195 196 err = ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod) 197 if err != nil { 198 errList = append(errList, fmt.Errorf("failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)) 199 } 200 201 return resp, utilerrors.NewAggregate(errList) 202 } 203 204 return resp, nil 205} 206 207// StopPodSandbox stops the sandbox. If there are any running containers in the 208// sandbox, they should be force terminated. 209// TODO: This function blocks sandbox teardown on networking teardown. Is it 210// better to cut our losses assuming an out of band GC routine will cleanup 211// after us? 212func (ds *dockerService) StopPodSandbox(ctx context.Context, r *runtimeapi.StopPodSandboxRequest) (*runtimeapi.StopPodSandboxResponse, error) { 213 var namespace, name string 214 var hostNetwork bool 215 216 podSandboxID := r.PodSandboxId 217 resp := &runtimeapi.StopPodSandboxResponse{} 218 219 // Try to retrieve minimal sandbox information from docker daemon or sandbox checkpoint. 220 inspectResult, metadata, statusErr := ds.getPodSandboxDetails(podSandboxID) 221 if statusErr == nil { 222 namespace = metadata.Namespace 223 name = metadata.Name 224 hostNetwork = (networkNamespaceMode(inspectResult) == runtimeapi.NamespaceMode_NODE) 225 } else { 226 checkpoint := NewPodSandboxCheckpoint("", "", &CheckpointData{}) 227 checkpointErr := ds.checkpointManager.GetCheckpoint(podSandboxID, checkpoint) 228 229 // Proceed if both sandbox container and checkpoint could not be found. This means that following 230 // actions will only have sandbox ID and not have pod namespace and name information. 231 // Return error if encounter any unexpected error. 232 if checkpointErr != nil { 233 if checkpointErr != errors.ErrCheckpointNotFound { 234 err := ds.checkpointManager.RemoveCheckpoint(podSandboxID) 235 if err != nil { 236 klog.ErrorS(err, "Failed to delete corrupt checkpoint for sandbox", "podSandboxID", podSandboxID) 237 } 238 } 239 if libdocker.IsContainerNotFoundError(statusErr) { 240 klog.InfoS("Both sandbox container and checkpoint could not be found. Proceed without further sandbox information.", "podSandboxID", podSandboxID) 241 } else { 242 return nil, utilerrors.NewAggregate([]error{ 243 fmt.Errorf("failed to get checkpoint for sandbox %q: %v", podSandboxID, checkpointErr), 244 fmt.Errorf("failed to get sandbox status: %v", statusErr)}) 245 } 246 } else { 247 _, name, namespace, _, hostNetwork = checkpoint.GetData() 248 } 249 } 250 251 // WARNING: The following operations made the following assumption: 252 // 1. kubelet will retry on any error returned by StopPodSandbox. 253 // 2. tearing down network and stopping sandbox container can succeed in any sequence. 254 // This depends on the implementation detail of network plugin and proper error handling. 255 // For kubenet, if tearing down network failed and sandbox container is stopped, kubelet 256 // will retry. On retry, kubenet will not be able to retrieve network namespace of the sandbox 257 // since it is stopped. With empty network namespace, CNI bridge plugin will conduct best 258 // effort clean up and will not return error. 259 errList := []error{} 260 ready, ok := ds.getNetworkReady(podSandboxID) 261 if !hostNetwork && (ready || !ok) { 262 // Only tear down the pod network if we haven't done so already 263 cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID) 264 err := ds.network.TearDownPod(namespace, name, cID) 265 if err == nil { 266 ds.setNetworkReady(podSandboxID, false) 267 } else { 268 errList = append(errList, err) 269 } 270 } 271 if err := ds.client.StopContainer(podSandboxID, defaultSandboxGracePeriod); err != nil { 272 // Do not return error if the container does not exist 273 if !libdocker.IsContainerNotFoundError(err) { 274 klog.ErrorS(err, "Failed to stop sandbox", "podSandboxID", podSandboxID) 275 errList = append(errList, err) 276 } else { 277 // remove the checkpoint for any sandbox that is not found in the runtime 278 ds.checkpointManager.RemoveCheckpoint(podSandboxID) 279 } 280 } 281 282 if len(errList) == 0 { 283 return resp, nil 284 } 285 286 // TODO: Stop all running containers in the sandbox. 287 return nil, utilerrors.NewAggregate(errList) 288} 289 290// RemovePodSandbox removes the sandbox. If there are running containers in the 291// sandbox, they should be forcibly removed. 292func (ds *dockerService) RemovePodSandbox(ctx context.Context, r *runtimeapi.RemovePodSandboxRequest) (*runtimeapi.RemovePodSandboxResponse, error) { 293 podSandboxID := r.PodSandboxId 294 var errs []error 295 296 opts := dockertypes.ContainerListOptions{All: true} 297 298 opts.Filters = dockerfilters.NewArgs() 299 f := newDockerFilter(&opts.Filters) 300 f.AddLabel(sandboxIDLabelKey, podSandboxID) 301 302 containers, err := ds.client.ListContainers(opts) 303 if err != nil { 304 errs = append(errs, err) 305 } 306 307 // Remove all containers in the sandbox. 308 for i := range containers { 309 if _, err := ds.RemoveContainer(ctx, &runtimeapi.RemoveContainerRequest{ContainerId: containers[i].ID}); err != nil && !libdocker.IsContainerNotFoundError(err) { 310 errs = append(errs, err) 311 } 312 } 313 314 // Remove the sandbox container. 315 err = ds.client.RemoveContainer(podSandboxID, dockertypes.ContainerRemoveOptions{RemoveVolumes: true, Force: true}) 316 if err == nil || libdocker.IsContainerNotFoundError(err) { 317 // Only clear network ready when the sandbox has actually been 318 // removed from docker or doesn't exist 319 ds.clearNetworkReady(podSandboxID) 320 } else { 321 errs = append(errs, err) 322 } 323 324 // Remove the checkpoint of the sandbox. 325 if err := ds.checkpointManager.RemoveCheckpoint(podSandboxID); err != nil { 326 errs = append(errs, err) 327 } 328 if len(errs) == 0 { 329 return &runtimeapi.RemovePodSandboxResponse{}, nil 330 } 331 return nil, utilerrors.NewAggregate(errs) 332} 333 334// getIPsFromPlugin interrogates the network plugin for sandbox IPs. 335func (ds *dockerService) getIPsFromPlugin(sandbox *dockertypes.ContainerJSON) ([]string, error) { 336 metadata, err := parseSandboxName(sandbox.Name) 337 if err != nil { 338 return nil, err 339 } 340 msg := fmt.Sprintf("Couldn't find network status for %s/%s through plugin", metadata.Namespace, metadata.Name) 341 cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID) 342 networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID) 343 if err != nil { 344 return nil, err 345 } 346 if networkStatus == nil { 347 return nil, fmt.Errorf("%v: invalid network status for", msg) 348 } 349 350 ips := make([]string, 0) 351 for _, ip := range networkStatus.IPs { 352 ips = append(ips, ip.String()) 353 } 354 // if we don't have any ip in our list then cni is using classic primary IP only 355 if len(ips) == 0 { 356 ips = append(ips, networkStatus.IP.String()) 357 } 358 return ips, nil 359} 360 361// getIPs returns the ip given the output of `docker inspect` on a pod sandbox, 362// first interrogating any registered plugins, then simply trusting the ip 363// in the sandbox itself. We look for an ipv4 address before ipv6. 364func (ds *dockerService) getIPs(podSandboxID string, sandbox *dockertypes.ContainerJSON) []string { 365 if sandbox.NetworkSettings == nil { 366 return nil 367 } 368 if networkNamespaceMode(sandbox) == runtimeapi.NamespaceMode_NODE { 369 // For sandboxes using host network, the shim is not responsible for 370 // reporting the IP. 371 return nil 372 } 373 374 // Don't bother getting IP if the pod is known and networking isn't ready 375 ready, ok := ds.getNetworkReady(podSandboxID) 376 if ok && !ready { 377 return nil 378 } 379 380 ips, err := ds.getIPsFromPlugin(sandbox) 381 if err == nil { 382 return ips 383 } 384 385 ips = make([]string, 0) 386 // TODO: trusting the docker ip is not a great idea. However docker uses 387 // eth0 by default and so does CNI, so if we find a docker IP here, we 388 // conclude that the plugin must have failed setup, or forgotten its ip. 389 // This is not a sensible assumption for plugins across the board, but if 390 // a plugin doesn't want this behavior, it can throw an error. 391 if sandbox.NetworkSettings.IPAddress != "" { 392 ips = append(ips, sandbox.NetworkSettings.IPAddress) 393 } 394 if sandbox.NetworkSettings.GlobalIPv6Address != "" { 395 ips = append(ips, sandbox.NetworkSettings.GlobalIPv6Address) 396 } 397 398 // If all else fails, warn but don't return an error, as pod status 399 // should generally not return anything except fatal errors 400 // FIXME: handle network errors by restarting the pod somehow? 401 klog.InfoS("Failed to read pod IP from plugin/docker", "err", err) 402 return ips 403} 404 405// Returns the inspect container response, the sandbox metadata, and network namespace mode 406func (ds *dockerService) getPodSandboxDetails(podSandboxID string) (*dockertypes.ContainerJSON, *runtimeapi.PodSandboxMetadata, error) { 407 resp, err := ds.client.InspectContainer(podSandboxID) 408 if err != nil { 409 return nil, nil, err 410 } 411 412 metadata, err := parseSandboxName(resp.Name) 413 if err != nil { 414 return nil, nil, err 415 } 416 417 return resp, metadata, nil 418} 419 420// PodSandboxStatus returns the status of the PodSandbox. 421func (ds *dockerService) PodSandboxStatus(ctx context.Context, req *runtimeapi.PodSandboxStatusRequest) (*runtimeapi.PodSandboxStatusResponse, error) { 422 podSandboxID := req.PodSandboxId 423 424 r, metadata, err := ds.getPodSandboxDetails(podSandboxID) 425 if err != nil { 426 return nil, err 427 } 428 429 // Parse the timestamps. 430 createdAt, _, _, err := getContainerTimestamps(r) 431 if err != nil { 432 return nil, fmt.Errorf("failed to parse timestamp for container %q: %v", podSandboxID, err) 433 } 434 ct := createdAt.UnixNano() 435 436 // Translate container to sandbox state. 437 state := runtimeapi.PodSandboxState_SANDBOX_NOTREADY 438 if r.State.Running { 439 state = runtimeapi.PodSandboxState_SANDBOX_READY 440 } 441 442 var ips []string 443 // TODO: Remove this when sandbox is available on windows 444 // This is a workaround for windows, where sandbox is not in use, and pod IP is determined through containers belonging to the Pod. 445 if ips = ds.determinePodIPBySandboxID(podSandboxID); len(ips) == 0 { 446 ips = ds.getIPs(podSandboxID, r) 447 } 448 449 // ip is primary ips 450 // ips is all other ips 451 ip := "" 452 if len(ips) != 0 { 453 ip = ips[0] 454 ips = ips[1:] 455 } 456 457 labels, annotations := extractLabels(r.Config.Labels) 458 status := &runtimeapi.PodSandboxStatus{ 459 Id: r.ID, 460 State: state, 461 CreatedAt: ct, 462 Metadata: metadata, 463 Labels: labels, 464 Annotations: annotations, 465 Network: &runtimeapi.PodSandboxNetworkStatus{ 466 Ip: ip, 467 }, 468 Linux: &runtimeapi.LinuxPodSandboxStatus{ 469 Namespaces: &runtimeapi.Namespace{ 470 Options: &runtimeapi.NamespaceOption{ 471 Network: networkNamespaceMode(r), 472 Pid: pidNamespaceMode(r), 473 Ipc: ipcNamespaceMode(r), 474 }, 475 }, 476 }, 477 } 478 // add additional IPs 479 additionalPodIPs := make([]*runtimeapi.PodIP, 0, len(ips)) 480 for _, ip := range ips { 481 additionalPodIPs = append(additionalPodIPs, &runtimeapi.PodIP{ 482 Ip: ip, 483 }) 484 } 485 status.Network.AdditionalIps = additionalPodIPs 486 return &runtimeapi.PodSandboxStatusResponse{Status: status}, nil 487} 488 489// ListPodSandbox returns a list of Sandbox. 490func (ds *dockerService) ListPodSandbox(_ context.Context, r *runtimeapi.ListPodSandboxRequest) (*runtimeapi.ListPodSandboxResponse, error) { 491 filter := r.GetFilter() 492 493 // By default, list all containers whether they are running or not. 494 opts := dockertypes.ContainerListOptions{All: true} 495 filterOutReadySandboxes := false 496 497 opts.Filters = dockerfilters.NewArgs() 498 f := newDockerFilter(&opts.Filters) 499 // Add filter to select only sandbox containers. 500 f.AddLabel(containerTypeLabelKey, containerTypeLabelSandbox) 501 502 if filter != nil { 503 if filter.Id != "" { 504 f.Add("id", filter.Id) 505 } 506 if filter.State != nil { 507 if filter.GetState().State == runtimeapi.PodSandboxState_SANDBOX_READY { 508 // Only list running containers. 509 opts.All = false 510 } else { 511 // runtimeapi.PodSandboxState_SANDBOX_NOTREADY can mean the 512 // container is in any of the non-running state (e.g., created, 513 // exited). We can't tell docker to filter out running 514 // containers directly, so we'll need to filter them out 515 // ourselves after getting the results. 516 filterOutReadySandboxes = true 517 } 518 } 519 520 if filter.LabelSelector != nil { 521 for k, v := range filter.LabelSelector { 522 f.AddLabel(k, v) 523 } 524 } 525 } 526 527 // Make sure we get the list of checkpoints first so that we don't include 528 // new PodSandboxes that are being created right now. 529 var err error 530 checkpoints := []string{} 531 if filter == nil { 532 checkpoints, err = ds.checkpointManager.ListCheckpoints() 533 if err != nil { 534 klog.ErrorS(err, "Failed to list checkpoints") 535 } 536 } 537 538 containers, err := ds.client.ListContainers(opts) 539 if err != nil { 540 return nil, err 541 } 542 543 // Convert docker containers to runtime api sandboxes. 544 result := []*runtimeapi.PodSandbox{} 545 // using map as set 546 sandboxIDs := make(map[string]bool) 547 for i := range containers { 548 c := containers[i] 549 converted, err := containerToRuntimeAPISandbox(&c) 550 if err != nil { 551 klog.V(4).InfoS("Unable to convert docker to runtime API sandbox", "containerName", c.Names, "err", err) 552 continue 553 } 554 if filterOutReadySandboxes && converted.State == runtimeapi.PodSandboxState_SANDBOX_READY { 555 continue 556 } 557 sandboxIDs[converted.Id] = true 558 result = append(result, converted) 559 } 560 561 // Include sandbox that could only be found with its checkpoint if no filter is applied 562 // These PodSandbox will only include PodSandboxID, Name, Namespace. 563 // These PodSandbox will be in PodSandboxState_SANDBOX_NOTREADY state. 564 for _, id := range checkpoints { 565 if _, ok := sandboxIDs[id]; ok { 566 continue 567 } 568 checkpoint := NewPodSandboxCheckpoint("", "", &CheckpointData{}) 569 err := ds.checkpointManager.GetCheckpoint(id, checkpoint) 570 if err != nil { 571 klog.ErrorS(err, "Failed to retrieve checkpoint for sandbox", "sandboxID", id) 572 if err == errors.ErrCorruptCheckpoint { 573 err = ds.checkpointManager.RemoveCheckpoint(id) 574 if err != nil { 575 klog.ErrorS(err, "Failed to delete corrupt checkpoint for sandbox", "sandboxID", id) 576 } 577 } 578 continue 579 } 580 result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint)) 581 } 582 583 return &runtimeapi.ListPodSandboxResponse{Items: result}, nil 584} 585 586// applySandboxLinuxOptions applies LinuxPodSandboxConfig to dockercontainer.HostConfig and dockercontainer.ContainerCreateConfig. 587func (ds *dockerService) applySandboxLinuxOptions(hc *dockercontainer.HostConfig, lc *runtimeapi.LinuxPodSandboxConfig, createConfig *dockertypes.ContainerCreateConfig, image string, separator rune) error { 588 if lc == nil { 589 return nil 590 } 591 // Apply security context. 592 if err := applySandboxSecurityContext(lc, createConfig.Config, hc, ds.network, separator); err != nil { 593 return err 594 } 595 596 // Set sysctls. 597 hc.Sysctls = lc.Sysctls 598 return nil 599} 600 601func (ds *dockerService) applySandboxResources(hc *dockercontainer.HostConfig, lc *runtimeapi.LinuxPodSandboxConfig) error { 602 hc.Resources = dockercontainer.Resources{ 603 MemorySwap: DefaultMemorySwap(), 604 CPUShares: defaultSandboxCPUshares, 605 // Use docker's default cpu quota/period. 606 } 607 608 if lc != nil { 609 // Apply Cgroup options. 610 cgroupParent, err := ds.GenerateExpectedCgroupParent(lc.CgroupParent) 611 if err != nil { 612 return err 613 } 614 hc.CgroupParent = cgroupParent 615 } 616 return nil 617} 618 619// makeSandboxDockerConfig returns dockertypes.ContainerCreateConfig based on runtimeapi.PodSandboxConfig. 620func (ds *dockerService) makeSandboxDockerConfig(c *runtimeapi.PodSandboxConfig, image string) (*dockertypes.ContainerCreateConfig, error) { 621 // Merge annotations and labels because docker supports only labels. 622 labels := makeLabels(c.GetLabels(), c.GetAnnotations()) 623 // Apply a label to distinguish sandboxes from regular containers. 624 labels[containerTypeLabelKey] = containerTypeLabelSandbox 625 // Apply a container name label for infra container. This is used in summary v1. 626 // TODO(random-liu): Deprecate this label once container metrics is directly got from CRI. 627 labels[types.KubernetesContainerNameLabel] = sandboxContainerName 628 629 hc := &dockercontainer.HostConfig{ 630 IpcMode: dockercontainer.IpcMode("shareable"), 631 } 632 createConfig := &dockertypes.ContainerCreateConfig{ 633 Name: makeSandboxName(c), 634 Config: &dockercontainer.Config{ 635 Hostname: c.Hostname, 636 // TODO: Handle environment variables. 637 Image: image, 638 Labels: labels, 639 }, 640 HostConfig: hc, 641 } 642 643 // Apply linux-specific options. 644 if err := ds.applySandboxLinuxOptions(hc, c.GetLinux(), createConfig, image, securityOptSeparator); err != nil { 645 return nil, err 646 } 647 648 // Set port mappings. 649 exposedPorts, portBindings := makePortsAndBindings(c.GetPortMappings()) 650 createConfig.Config.ExposedPorts = exposedPorts 651 hc.PortBindings = portBindings 652 653 hc.OomScoreAdj = defaultSandboxOOMAdj 654 655 // Apply resource options. 656 if err := ds.applySandboxResources(hc, c.GetLinux()); err != nil { 657 return nil, err 658 } 659 660 // Set security options. 661 securityOpts := ds.getSandBoxSecurityOpts(securityOptSeparator) 662 hc.SecurityOpt = append(hc.SecurityOpt, securityOpts...) 663 664 return createConfig, nil 665} 666 667// networkNamespaceMode returns the network runtimeapi.NamespaceMode for this container. 668// Supports: POD, NODE 669func networkNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode { 670 if container != nil && container.HostConfig != nil && string(container.HostConfig.NetworkMode) == namespaceModeHost { 671 return runtimeapi.NamespaceMode_NODE 672 } 673 return runtimeapi.NamespaceMode_POD 674} 675 676// pidNamespaceMode returns the PID runtimeapi.NamespaceMode for this container. 677// Supports: CONTAINER, NODE 678// TODO(verb): add support for POD PID namespace sharing 679func pidNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode { 680 if container != nil && container.HostConfig != nil && string(container.HostConfig.PidMode) == namespaceModeHost { 681 return runtimeapi.NamespaceMode_NODE 682 } 683 return runtimeapi.NamespaceMode_CONTAINER 684} 685 686// ipcNamespaceMode returns the IPC runtimeapi.NamespaceMode for this container. 687// Supports: POD, NODE 688func ipcNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode { 689 if container != nil && container.HostConfig != nil && string(container.HostConfig.IpcMode) == namespaceModeHost { 690 return runtimeapi.NamespaceMode_NODE 691 } 692 return runtimeapi.NamespaceMode_POD 693} 694 695func constructPodSandboxCheckpoint(config *runtimeapi.PodSandboxConfig) checkpointmanager.Checkpoint { 696 data := CheckpointData{} 697 for _, pm := range config.GetPortMappings() { 698 proto := toCheckpointProtocol(pm.Protocol) 699 data.PortMappings = append(data.PortMappings, &PortMapping{ 700 HostPort: &pm.HostPort, 701 ContainerPort: &pm.ContainerPort, 702 Protocol: &proto, 703 HostIP: pm.HostIp, 704 }) 705 } 706 if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtimeapi.NamespaceMode_NODE { 707 data.HostNetwork = true 708 } 709 return NewPodSandboxCheckpoint(config.Metadata.Namespace, config.Metadata.Name, &data) 710} 711 712func toCheckpointProtocol(protocol runtimeapi.Protocol) Protocol { 713 switch protocol { 714 case runtimeapi.Protocol_TCP: 715 return protocolTCP 716 case runtimeapi.Protocol_UDP: 717 return protocolUDP 718 case runtimeapi.Protocol_SCTP: 719 return protocolSCTP 720 } 721 klog.InfoS("Unknown protocol, defaulting to TCP", "protocol", protocol) 722 return protocolTCP 723} 724 725// rewriteResolvFile rewrites resolv.conf file generated by docker. 726func rewriteResolvFile(resolvFilePath string, dns []string, dnsSearch []string, dnsOptions []string) error { 727 if len(resolvFilePath) == 0 { 728 klog.ErrorS(nil, "ResolvConfPath is empty.") 729 return nil 730 } 731 732 if _, err := os.Stat(resolvFilePath); os.IsNotExist(err) { 733 return fmt.Errorf("ResolvConfPath %q does not exist", resolvFilePath) 734 } 735 736 var resolvFileContent []string 737 for _, srv := range dns { 738 resolvFileContent = append(resolvFileContent, "nameserver "+srv) 739 } 740 741 if len(dnsSearch) > 0 { 742 resolvFileContent = append(resolvFileContent, "search "+strings.Join(dnsSearch, " ")) 743 } 744 745 if len(dnsOptions) > 0 { 746 resolvFileContent = append(resolvFileContent, "options "+strings.Join(dnsOptions, " ")) 747 } 748 749 if len(resolvFileContent) > 0 { 750 resolvFileContentStr := strings.Join(resolvFileContent, "\n") 751 resolvFileContentStr += "\n" 752 753 klog.V(4).InfoS("Will attempt to re-write config file", "path", resolvFilePath, "fileContent", resolvFileContent) 754 if err := rewriteFile(resolvFilePath, resolvFileContentStr); err != nil { 755 klog.ErrorS(err, "Resolv.conf could not be updated") 756 return err 757 } 758 } 759 760 return nil 761} 762 763func rewriteFile(filePath, stringToWrite string) error { 764 f, err := os.OpenFile(filePath, os.O_TRUNC|os.O_WRONLY, 0644) 765 if err != nil { 766 return err 767 } 768 defer f.Close() 769 770 _, err = f.WriteString(stringToWrite) 771 return err 772} 773