1// +build !dockerless
2
3/*
4Copyright 2016 The Kubernetes Authors.
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10    http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17*/
18
19package dockershim
20
21import (
22	"context"
23	"encoding/json"
24	"fmt"
25	"os"
26	"strings"
27	"time"
28
29	dockertypes "github.com/docker/docker/api/types"
30	dockercontainer "github.com/docker/docker/api/types/container"
31	dockerfilters "github.com/docker/docker/api/types/filters"
32	utilerrors "k8s.io/apimachinery/pkg/util/errors"
33	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
34	"k8s.io/klog/v2"
35	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
36	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
37	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
38	"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
39	"k8s.io/kubernetes/pkg/kubelet/types"
40)
41
42const (
43	defaultSandboxImage = "k8s.gcr.io/pause:3.5"
44
45	// Various default sandbox resources requests/limits.
46	defaultSandboxCPUshares int64 = 2
47
48	// defaultSandboxOOMAdj is the oom score adjustment for the docker
49	// sandbox container. Using this OOM adj makes it very unlikely, but not
50	// impossible, that the defaultSandox will experience an oom kill. -998
51	// is chosen to signify sandbox should be OOM killed before other more
52	// vital processes like the docker daemon, the kubelet, etc...
53	defaultSandboxOOMAdj int = -998
54
55	// Name of the underlying container runtime
56	runtimeName = "docker"
57)
58
59var (
60	// Termination grace period
61	defaultSandboxGracePeriod = time.Duration(10) * time.Second
62)
63
64// Returns whether the sandbox network is ready, and whether the sandbox is known
65func (ds *dockerService) getNetworkReady(podSandboxID string) (bool, bool) {
66	ds.networkReadyLock.Lock()
67	defer ds.networkReadyLock.Unlock()
68	ready, ok := ds.networkReady[podSandboxID]
69	return ready, ok
70}
71
72func (ds *dockerService) setNetworkReady(podSandboxID string, ready bool) {
73	ds.networkReadyLock.Lock()
74	defer ds.networkReadyLock.Unlock()
75	ds.networkReady[podSandboxID] = ready
76}
77
78func (ds *dockerService) clearNetworkReady(podSandboxID string) {
79	ds.networkReadyLock.Lock()
80	defer ds.networkReadyLock.Unlock()
81	delete(ds.networkReady, podSandboxID)
82}
83
84// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
85// the sandbox is in ready state.
86// For docker, PodSandbox is implemented by a container holding the network
87// namespace for the pod.
88// Note: docker doesn't use LogDirectory (yet).
89func (ds *dockerService) RunPodSandbox(ctx context.Context, r *runtimeapi.RunPodSandboxRequest) (*runtimeapi.RunPodSandboxResponse, error) {
90	config := r.GetConfig()
91
92	// Step 1: Pull the image for the sandbox.
93	image := defaultSandboxImage
94	podSandboxImage := ds.podSandboxImage
95	if len(podSandboxImage) != 0 {
96		image = podSandboxImage
97	}
98
99	// NOTE: To use a custom sandbox image in a private repository, users need to configure the nodes with credentials properly.
100	// see: https://kubernetes.io/docs/user-guide/images/#configuring-nodes-to-authenticate-to-a-private-registry
101	// Only pull sandbox image when it's not present - v1.PullIfNotPresent.
102	if err := ensureSandboxImageExists(ds.client, image); err != nil {
103		return nil, err
104	}
105
106	// Step 2: Create the sandbox container.
107	if r.GetRuntimeHandler() != "" && r.GetRuntimeHandler() != runtimeName {
108		return nil, fmt.Errorf("RuntimeHandler %q not supported", r.GetRuntimeHandler())
109	}
110	createConfig, err := ds.makeSandboxDockerConfig(config, image)
111	if err != nil {
112		return nil, fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err)
113	}
114	createResp, err := ds.client.CreateContainer(*createConfig)
115	if err != nil {
116		createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err)
117	}
118
119	if err != nil || createResp == nil {
120		return nil, fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
121	}
122	resp := &runtimeapi.RunPodSandboxResponse{PodSandboxId: createResp.ID}
123
124	ds.setNetworkReady(createResp.ID, false)
125	defer func(e *error) {
126		// Set networking ready depending on the error return of
127		// the parent function
128		if *e == nil {
129			ds.setNetworkReady(createResp.ID, true)
130		}
131	}(&err)
132
133	// Step 3: Create Sandbox Checkpoint.
134	if err = ds.checkpointManager.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
135		return nil, err
136	}
137
138	// Step 4: Start the sandbox container.
139	// Assume kubelet's garbage collector would remove the sandbox later, if
140	// startContainer failed.
141	err = ds.client.StartContainer(createResp.ID)
142	if err != nil {
143		return nil, fmt.Errorf("failed to start sandbox container for pod %q: %v", config.Metadata.Name, err)
144	}
145
146	// Rewrite resolv.conf file generated by docker.
147	// NOTE: cluster dns settings aren't passed anymore to docker api in all cases,
148	// not only for pods with host network: the resolver conf will be overwritten
149	// after sandbox creation to override docker's behaviour. This resolv.conf
150	// file is shared by all containers of the same pod, and needs to be modified
151	// only once per pod.
152	if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
153		containerInfo, err := ds.client.InspectContainer(createResp.ID)
154		if err != nil {
155			return nil, fmt.Errorf("failed to inspect sandbox container for pod %q: %v", config.Metadata.Name, err)
156		}
157
158		if err := rewriteResolvFile(containerInfo.ResolvConfPath, dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options); err != nil {
159			return nil, fmt.Errorf("rewrite resolv.conf failed for pod %q: %v", config.Metadata.Name, err)
160		}
161	}
162
163	// Do not invoke network plugins if in hostNetwork mode.
164	if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtimeapi.NamespaceMode_NODE {
165		return resp, nil
166	}
167
168	// Step 5: Setup networking for the sandbox.
169	// All pod networking is setup by a CNI plugin discovered at startup time.
170	// This plugin assigns the pod ip, sets up routes inside the sandbox,
171	// creates interfaces etc. In theory, its jurisdiction ends with pod
172	// sandbox networking, but it might insert iptables rules or open ports
173	// on the host as well, to satisfy parts of the pod spec that aren't
174	// recognized by the CNI standard yet.
175	cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
176	networkOptions := make(map[string]string)
177	if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
178		// Build DNS options.
179		dnsOption, err := json.Marshal(dnsConfig)
180		if err != nil {
181			return nil, fmt.Errorf("failed to marshal dns config for pod %q: %v", config.Metadata.Name, err)
182		}
183		networkOptions["dns"] = string(dnsOption)
184	}
185	err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations, networkOptions)
186	if err != nil {
187		errList := []error{fmt.Errorf("failed to set up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err)}
188
189		// Ensure network resources are cleaned up even if the plugin
190		// succeeded but an error happened between that success and here.
191		err = ds.network.TearDownPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID)
192		if err != nil {
193			errList = append(errList, fmt.Errorf("failed to clean up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err))
194		}
195
196		err = ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod)
197		if err != nil {
198			errList = append(errList, fmt.Errorf("failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err))
199		}
200
201		return resp, utilerrors.NewAggregate(errList)
202	}
203
204	return resp, nil
205}
206
207// StopPodSandbox stops the sandbox. If there are any running containers in the
208// sandbox, they should be force terminated.
209// TODO: This function blocks sandbox teardown on networking teardown. Is it
210// better to cut our losses assuming an out of band GC routine will cleanup
211// after us?
212func (ds *dockerService) StopPodSandbox(ctx context.Context, r *runtimeapi.StopPodSandboxRequest) (*runtimeapi.StopPodSandboxResponse, error) {
213	var namespace, name string
214	var hostNetwork bool
215
216	podSandboxID := r.PodSandboxId
217	resp := &runtimeapi.StopPodSandboxResponse{}
218
219	// Try to retrieve minimal sandbox information from docker daemon or sandbox checkpoint.
220	inspectResult, metadata, statusErr := ds.getPodSandboxDetails(podSandboxID)
221	if statusErr == nil {
222		namespace = metadata.Namespace
223		name = metadata.Name
224		hostNetwork = (networkNamespaceMode(inspectResult) == runtimeapi.NamespaceMode_NODE)
225	} else {
226		checkpoint := NewPodSandboxCheckpoint("", "", &CheckpointData{})
227		checkpointErr := ds.checkpointManager.GetCheckpoint(podSandboxID, checkpoint)
228
229		// Proceed if both sandbox container and checkpoint could not be found. This means that following
230		// actions will only have sandbox ID and not have pod namespace and name information.
231		// Return error if encounter any unexpected error.
232		if checkpointErr != nil {
233			if checkpointErr != errors.ErrCheckpointNotFound {
234				err := ds.checkpointManager.RemoveCheckpoint(podSandboxID)
235				if err != nil {
236					klog.ErrorS(err, "Failed to delete corrupt checkpoint for sandbox", "podSandboxID", podSandboxID)
237				}
238			}
239			if libdocker.IsContainerNotFoundError(statusErr) {
240				klog.InfoS("Both sandbox container and checkpoint could not be found. Proceed without further sandbox information.", "podSandboxID", podSandboxID)
241			} else {
242				return nil, utilerrors.NewAggregate([]error{
243					fmt.Errorf("failed to get checkpoint for sandbox %q: %v", podSandboxID, checkpointErr),
244					fmt.Errorf("failed to get sandbox status: %v", statusErr)})
245			}
246		} else {
247			_, name, namespace, _, hostNetwork = checkpoint.GetData()
248		}
249	}
250
251	// WARNING: The following operations made the following assumption:
252	// 1. kubelet will retry on any error returned by StopPodSandbox.
253	// 2. tearing down network and stopping sandbox container can succeed in any sequence.
254	// This depends on the implementation detail of network plugin and proper error handling.
255	// For kubenet, if tearing down network failed and sandbox container is stopped, kubelet
256	// will retry. On retry, kubenet will not be able to retrieve network namespace of the sandbox
257	// since it is stopped. With empty network namespace, CNI bridge plugin will conduct best
258	// effort clean up and will not return error.
259	errList := []error{}
260	ready, ok := ds.getNetworkReady(podSandboxID)
261	if !hostNetwork && (ready || !ok) {
262		// Only tear down the pod network if we haven't done so already
263		cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID)
264		err := ds.network.TearDownPod(namespace, name, cID)
265		if err == nil {
266			ds.setNetworkReady(podSandboxID, false)
267		} else {
268			errList = append(errList, err)
269		}
270	}
271	if err := ds.client.StopContainer(podSandboxID, defaultSandboxGracePeriod); err != nil {
272		// Do not return error if the container does not exist
273		if !libdocker.IsContainerNotFoundError(err) {
274			klog.ErrorS(err, "Failed to stop sandbox", "podSandboxID", podSandboxID)
275			errList = append(errList, err)
276		} else {
277			// remove the checkpoint for any sandbox that is not found in the runtime
278			ds.checkpointManager.RemoveCheckpoint(podSandboxID)
279		}
280	}
281
282	if len(errList) == 0 {
283		return resp, nil
284	}
285
286	// TODO: Stop all running containers in the sandbox.
287	return nil, utilerrors.NewAggregate(errList)
288}
289
290// RemovePodSandbox removes the sandbox. If there are running containers in the
291// sandbox, they should be forcibly removed.
292func (ds *dockerService) RemovePodSandbox(ctx context.Context, r *runtimeapi.RemovePodSandboxRequest) (*runtimeapi.RemovePodSandboxResponse, error) {
293	podSandboxID := r.PodSandboxId
294	var errs []error
295
296	opts := dockertypes.ContainerListOptions{All: true}
297
298	opts.Filters = dockerfilters.NewArgs()
299	f := newDockerFilter(&opts.Filters)
300	f.AddLabel(sandboxIDLabelKey, podSandboxID)
301
302	containers, err := ds.client.ListContainers(opts)
303	if err != nil {
304		errs = append(errs, err)
305	}
306
307	// Remove all containers in the sandbox.
308	for i := range containers {
309		if _, err := ds.RemoveContainer(ctx, &runtimeapi.RemoveContainerRequest{ContainerId: containers[i].ID}); err != nil && !libdocker.IsContainerNotFoundError(err) {
310			errs = append(errs, err)
311		}
312	}
313
314	// Remove the sandbox container.
315	err = ds.client.RemoveContainer(podSandboxID, dockertypes.ContainerRemoveOptions{RemoveVolumes: true, Force: true})
316	if err == nil || libdocker.IsContainerNotFoundError(err) {
317		// Only clear network ready when the sandbox has actually been
318		// removed from docker or doesn't exist
319		ds.clearNetworkReady(podSandboxID)
320	} else {
321		errs = append(errs, err)
322	}
323
324	// Remove the checkpoint of the sandbox.
325	if err := ds.checkpointManager.RemoveCheckpoint(podSandboxID); err != nil {
326		errs = append(errs, err)
327	}
328	if len(errs) == 0 {
329		return &runtimeapi.RemovePodSandboxResponse{}, nil
330	}
331	return nil, utilerrors.NewAggregate(errs)
332}
333
334// getIPsFromPlugin interrogates the network plugin for sandbox IPs.
335func (ds *dockerService) getIPsFromPlugin(sandbox *dockertypes.ContainerJSON) ([]string, error) {
336	metadata, err := parseSandboxName(sandbox.Name)
337	if err != nil {
338		return nil, err
339	}
340	msg := fmt.Sprintf("Couldn't find network status for %s/%s through plugin", metadata.Namespace, metadata.Name)
341	cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID)
342	networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID)
343	if err != nil {
344		return nil, err
345	}
346	if networkStatus == nil {
347		return nil, fmt.Errorf("%v: invalid network status for", msg)
348	}
349
350	ips := make([]string, 0)
351	for _, ip := range networkStatus.IPs {
352		ips = append(ips, ip.String())
353	}
354	// if we don't have any ip in our list then cni is using classic primary IP only
355	if len(ips) == 0 {
356		ips = append(ips, networkStatus.IP.String())
357	}
358	return ips, nil
359}
360
361// getIPs returns the ip given the output of `docker inspect` on a pod sandbox,
362// first interrogating any registered plugins, then simply trusting the ip
363// in the sandbox itself. We look for an ipv4 address before ipv6.
364func (ds *dockerService) getIPs(podSandboxID string, sandbox *dockertypes.ContainerJSON) []string {
365	if sandbox.NetworkSettings == nil {
366		return nil
367	}
368	if networkNamespaceMode(sandbox) == runtimeapi.NamespaceMode_NODE {
369		// For sandboxes using host network, the shim is not responsible for
370		// reporting the IP.
371		return nil
372	}
373
374	// Don't bother getting IP if the pod is known and networking isn't ready
375	ready, ok := ds.getNetworkReady(podSandboxID)
376	if ok && !ready {
377		return nil
378	}
379
380	ips, err := ds.getIPsFromPlugin(sandbox)
381	if err == nil {
382		return ips
383	}
384
385	ips = make([]string, 0)
386	// TODO: trusting the docker ip is not a great idea. However docker uses
387	// eth0 by default and so does CNI, so if we find a docker IP here, we
388	// conclude that the plugin must have failed setup, or forgotten its ip.
389	// This is not a sensible assumption for plugins across the board, but if
390	// a plugin doesn't want this behavior, it can throw an error.
391	if sandbox.NetworkSettings.IPAddress != "" {
392		ips = append(ips, sandbox.NetworkSettings.IPAddress)
393	}
394	if sandbox.NetworkSettings.GlobalIPv6Address != "" {
395		ips = append(ips, sandbox.NetworkSettings.GlobalIPv6Address)
396	}
397
398	// If all else fails, warn but don't return an error, as pod status
399	// should generally not return anything except fatal errors
400	// FIXME: handle network errors by restarting the pod somehow?
401	klog.InfoS("Failed to read pod IP from plugin/docker", "err", err)
402	return ips
403}
404
405// Returns the inspect container response, the sandbox metadata, and network namespace mode
406func (ds *dockerService) getPodSandboxDetails(podSandboxID string) (*dockertypes.ContainerJSON, *runtimeapi.PodSandboxMetadata, error) {
407	resp, err := ds.client.InspectContainer(podSandboxID)
408	if err != nil {
409		return nil, nil, err
410	}
411
412	metadata, err := parseSandboxName(resp.Name)
413	if err != nil {
414		return nil, nil, err
415	}
416
417	return resp, metadata, nil
418}
419
420// PodSandboxStatus returns the status of the PodSandbox.
421func (ds *dockerService) PodSandboxStatus(ctx context.Context, req *runtimeapi.PodSandboxStatusRequest) (*runtimeapi.PodSandboxStatusResponse, error) {
422	podSandboxID := req.PodSandboxId
423
424	r, metadata, err := ds.getPodSandboxDetails(podSandboxID)
425	if err != nil {
426		return nil, err
427	}
428
429	// Parse the timestamps.
430	createdAt, _, _, err := getContainerTimestamps(r)
431	if err != nil {
432		return nil, fmt.Errorf("failed to parse timestamp for container %q: %v", podSandboxID, err)
433	}
434	ct := createdAt.UnixNano()
435
436	// Translate container to sandbox state.
437	state := runtimeapi.PodSandboxState_SANDBOX_NOTREADY
438	if r.State.Running {
439		state = runtimeapi.PodSandboxState_SANDBOX_READY
440	}
441
442	var ips []string
443	// TODO: Remove this when sandbox is available on windows
444	// This is a workaround for windows, where sandbox is not in use, and pod IP is determined through containers belonging to the Pod.
445	if ips = ds.determinePodIPBySandboxID(podSandboxID); len(ips) == 0 {
446		ips = ds.getIPs(podSandboxID, r)
447	}
448
449	// ip is primary ips
450	// ips is all other ips
451	ip := ""
452	if len(ips) != 0 {
453		ip = ips[0]
454		ips = ips[1:]
455	}
456
457	labels, annotations := extractLabels(r.Config.Labels)
458	status := &runtimeapi.PodSandboxStatus{
459		Id:          r.ID,
460		State:       state,
461		CreatedAt:   ct,
462		Metadata:    metadata,
463		Labels:      labels,
464		Annotations: annotations,
465		Network: &runtimeapi.PodSandboxNetworkStatus{
466			Ip: ip,
467		},
468		Linux: &runtimeapi.LinuxPodSandboxStatus{
469			Namespaces: &runtimeapi.Namespace{
470				Options: &runtimeapi.NamespaceOption{
471					Network: networkNamespaceMode(r),
472					Pid:     pidNamespaceMode(r),
473					Ipc:     ipcNamespaceMode(r),
474				},
475			},
476		},
477	}
478	// add additional IPs
479	additionalPodIPs := make([]*runtimeapi.PodIP, 0, len(ips))
480	for _, ip := range ips {
481		additionalPodIPs = append(additionalPodIPs, &runtimeapi.PodIP{
482			Ip: ip,
483		})
484	}
485	status.Network.AdditionalIps = additionalPodIPs
486	return &runtimeapi.PodSandboxStatusResponse{Status: status}, nil
487}
488
489// ListPodSandbox returns a list of Sandbox.
490func (ds *dockerService) ListPodSandbox(_ context.Context, r *runtimeapi.ListPodSandboxRequest) (*runtimeapi.ListPodSandboxResponse, error) {
491	filter := r.GetFilter()
492
493	// By default, list all containers whether they are running or not.
494	opts := dockertypes.ContainerListOptions{All: true}
495	filterOutReadySandboxes := false
496
497	opts.Filters = dockerfilters.NewArgs()
498	f := newDockerFilter(&opts.Filters)
499	// Add filter to select only sandbox containers.
500	f.AddLabel(containerTypeLabelKey, containerTypeLabelSandbox)
501
502	if filter != nil {
503		if filter.Id != "" {
504			f.Add("id", filter.Id)
505		}
506		if filter.State != nil {
507			if filter.GetState().State == runtimeapi.PodSandboxState_SANDBOX_READY {
508				// Only list running containers.
509				opts.All = false
510			} else {
511				// runtimeapi.PodSandboxState_SANDBOX_NOTREADY can mean the
512				// container is in any of the non-running state (e.g., created,
513				// exited). We can't tell docker to filter out running
514				// containers directly, so we'll need to filter them out
515				// ourselves after getting the results.
516				filterOutReadySandboxes = true
517			}
518		}
519
520		if filter.LabelSelector != nil {
521			for k, v := range filter.LabelSelector {
522				f.AddLabel(k, v)
523			}
524		}
525	}
526
527	// Make sure we get the list of checkpoints first so that we don't include
528	// new PodSandboxes that are being created right now.
529	var err error
530	checkpoints := []string{}
531	if filter == nil {
532		checkpoints, err = ds.checkpointManager.ListCheckpoints()
533		if err != nil {
534			klog.ErrorS(err, "Failed to list checkpoints")
535		}
536	}
537
538	containers, err := ds.client.ListContainers(opts)
539	if err != nil {
540		return nil, err
541	}
542
543	// Convert docker containers to runtime api sandboxes.
544	result := []*runtimeapi.PodSandbox{}
545	// using map as set
546	sandboxIDs := make(map[string]bool)
547	for i := range containers {
548		c := containers[i]
549		converted, err := containerToRuntimeAPISandbox(&c)
550		if err != nil {
551			klog.V(4).InfoS("Unable to convert docker to runtime API sandbox", "containerName", c.Names, "err", err)
552			continue
553		}
554		if filterOutReadySandboxes && converted.State == runtimeapi.PodSandboxState_SANDBOX_READY {
555			continue
556		}
557		sandboxIDs[converted.Id] = true
558		result = append(result, converted)
559	}
560
561	// Include sandbox that could only be found with its checkpoint if no filter is applied
562	// These PodSandbox will only include PodSandboxID, Name, Namespace.
563	// These PodSandbox will be in PodSandboxState_SANDBOX_NOTREADY state.
564	for _, id := range checkpoints {
565		if _, ok := sandboxIDs[id]; ok {
566			continue
567		}
568		checkpoint := NewPodSandboxCheckpoint("", "", &CheckpointData{})
569		err := ds.checkpointManager.GetCheckpoint(id, checkpoint)
570		if err != nil {
571			klog.ErrorS(err, "Failed to retrieve checkpoint for sandbox", "sandboxID", id)
572			if err == errors.ErrCorruptCheckpoint {
573				err = ds.checkpointManager.RemoveCheckpoint(id)
574				if err != nil {
575					klog.ErrorS(err, "Failed to delete corrupt checkpoint for sandbox", "sandboxID", id)
576				}
577			}
578			continue
579		}
580		result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint))
581	}
582
583	return &runtimeapi.ListPodSandboxResponse{Items: result}, nil
584}
585
586// applySandboxLinuxOptions applies LinuxPodSandboxConfig to dockercontainer.HostConfig and dockercontainer.ContainerCreateConfig.
587func (ds *dockerService) applySandboxLinuxOptions(hc *dockercontainer.HostConfig, lc *runtimeapi.LinuxPodSandboxConfig, createConfig *dockertypes.ContainerCreateConfig, image string, separator rune) error {
588	if lc == nil {
589		return nil
590	}
591	// Apply security context.
592	if err := applySandboxSecurityContext(lc, createConfig.Config, hc, ds.network, separator); err != nil {
593		return err
594	}
595
596	// Set sysctls.
597	hc.Sysctls = lc.Sysctls
598	return nil
599}
600
601func (ds *dockerService) applySandboxResources(hc *dockercontainer.HostConfig, lc *runtimeapi.LinuxPodSandboxConfig) error {
602	hc.Resources = dockercontainer.Resources{
603		MemorySwap: DefaultMemorySwap(),
604		CPUShares:  defaultSandboxCPUshares,
605		// Use docker's default cpu quota/period.
606	}
607
608	if lc != nil {
609		// Apply Cgroup options.
610		cgroupParent, err := ds.GenerateExpectedCgroupParent(lc.CgroupParent)
611		if err != nil {
612			return err
613		}
614		hc.CgroupParent = cgroupParent
615	}
616	return nil
617}
618
619// makeSandboxDockerConfig returns dockertypes.ContainerCreateConfig based on runtimeapi.PodSandboxConfig.
620func (ds *dockerService) makeSandboxDockerConfig(c *runtimeapi.PodSandboxConfig, image string) (*dockertypes.ContainerCreateConfig, error) {
621	// Merge annotations and labels because docker supports only labels.
622	labels := makeLabels(c.GetLabels(), c.GetAnnotations())
623	// Apply a label to distinguish sandboxes from regular containers.
624	labels[containerTypeLabelKey] = containerTypeLabelSandbox
625	// Apply a container name label for infra container. This is used in summary v1.
626	// TODO(random-liu): Deprecate this label once container metrics is directly got from CRI.
627	labels[types.KubernetesContainerNameLabel] = sandboxContainerName
628
629	hc := &dockercontainer.HostConfig{
630		IpcMode: dockercontainer.IpcMode("shareable"),
631	}
632	createConfig := &dockertypes.ContainerCreateConfig{
633		Name: makeSandboxName(c),
634		Config: &dockercontainer.Config{
635			Hostname: c.Hostname,
636			// TODO: Handle environment variables.
637			Image:  image,
638			Labels: labels,
639		},
640		HostConfig: hc,
641	}
642
643	// Apply linux-specific options.
644	if err := ds.applySandboxLinuxOptions(hc, c.GetLinux(), createConfig, image, securityOptSeparator); err != nil {
645		return nil, err
646	}
647
648	// Set port mappings.
649	exposedPorts, portBindings := makePortsAndBindings(c.GetPortMappings())
650	createConfig.Config.ExposedPorts = exposedPorts
651	hc.PortBindings = portBindings
652
653	hc.OomScoreAdj = defaultSandboxOOMAdj
654
655	// Apply resource options.
656	if err := ds.applySandboxResources(hc, c.GetLinux()); err != nil {
657		return nil, err
658	}
659
660	// Set security options.
661	securityOpts := ds.getSandBoxSecurityOpts(securityOptSeparator)
662	hc.SecurityOpt = append(hc.SecurityOpt, securityOpts...)
663
664	return createConfig, nil
665}
666
667// networkNamespaceMode returns the network runtimeapi.NamespaceMode for this container.
668// Supports: POD, NODE
669func networkNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode {
670	if container != nil && container.HostConfig != nil && string(container.HostConfig.NetworkMode) == namespaceModeHost {
671		return runtimeapi.NamespaceMode_NODE
672	}
673	return runtimeapi.NamespaceMode_POD
674}
675
676// pidNamespaceMode returns the PID runtimeapi.NamespaceMode for this container.
677// Supports: CONTAINER, NODE
678// TODO(verb): add support for POD PID namespace sharing
679func pidNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode {
680	if container != nil && container.HostConfig != nil && string(container.HostConfig.PidMode) == namespaceModeHost {
681		return runtimeapi.NamespaceMode_NODE
682	}
683	return runtimeapi.NamespaceMode_CONTAINER
684}
685
686// ipcNamespaceMode returns the IPC runtimeapi.NamespaceMode for this container.
687// Supports: POD, NODE
688func ipcNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode {
689	if container != nil && container.HostConfig != nil && string(container.HostConfig.IpcMode) == namespaceModeHost {
690		return runtimeapi.NamespaceMode_NODE
691	}
692	return runtimeapi.NamespaceMode_POD
693}
694
695func constructPodSandboxCheckpoint(config *runtimeapi.PodSandboxConfig) checkpointmanager.Checkpoint {
696	data := CheckpointData{}
697	for _, pm := range config.GetPortMappings() {
698		proto := toCheckpointProtocol(pm.Protocol)
699		data.PortMappings = append(data.PortMappings, &PortMapping{
700			HostPort:      &pm.HostPort,
701			ContainerPort: &pm.ContainerPort,
702			Protocol:      &proto,
703			HostIP:        pm.HostIp,
704		})
705	}
706	if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtimeapi.NamespaceMode_NODE {
707		data.HostNetwork = true
708	}
709	return NewPodSandboxCheckpoint(config.Metadata.Namespace, config.Metadata.Name, &data)
710}
711
712func toCheckpointProtocol(protocol runtimeapi.Protocol) Protocol {
713	switch protocol {
714	case runtimeapi.Protocol_TCP:
715		return protocolTCP
716	case runtimeapi.Protocol_UDP:
717		return protocolUDP
718	case runtimeapi.Protocol_SCTP:
719		return protocolSCTP
720	}
721	klog.InfoS("Unknown protocol, defaulting to TCP", "protocol", protocol)
722	return protocolTCP
723}
724
725// rewriteResolvFile rewrites resolv.conf file generated by docker.
726func rewriteResolvFile(resolvFilePath string, dns []string, dnsSearch []string, dnsOptions []string) error {
727	if len(resolvFilePath) == 0 {
728		klog.ErrorS(nil, "ResolvConfPath is empty.")
729		return nil
730	}
731
732	if _, err := os.Stat(resolvFilePath); os.IsNotExist(err) {
733		return fmt.Errorf("ResolvConfPath %q does not exist", resolvFilePath)
734	}
735
736	var resolvFileContent []string
737	for _, srv := range dns {
738		resolvFileContent = append(resolvFileContent, "nameserver "+srv)
739	}
740
741	if len(dnsSearch) > 0 {
742		resolvFileContent = append(resolvFileContent, "search "+strings.Join(dnsSearch, " "))
743	}
744
745	if len(dnsOptions) > 0 {
746		resolvFileContent = append(resolvFileContent, "options "+strings.Join(dnsOptions, " "))
747	}
748
749	if len(resolvFileContent) > 0 {
750		resolvFileContentStr := strings.Join(resolvFileContent, "\n")
751		resolvFileContentStr += "\n"
752
753		klog.V(4).InfoS("Will attempt to re-write config file", "path", resolvFilePath, "fileContent", resolvFileContent)
754		if err := rewriteFile(resolvFilePath, resolvFileContentStr); err != nil {
755			klog.ErrorS(err, "Resolv.conf could not be updated")
756			return err
757		}
758	}
759
760	return nil
761}
762
763func rewriteFile(filePath, stringToWrite string) error {
764	f, err := os.OpenFile(filePath, os.O_TRUNC|os.O_WRONLY, 0644)
765	if err != nil {
766		return err
767	}
768	defer f.Close()
769
770	_, err = f.WriteString(stringToWrite)
771	return err
772}
773