1/*
2Copyright 2015 The Kubernetes Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package container
18
19import (
20	"encoding/json"
21	"fmt"
22	"hash/fnv"
23	"strings"
24
25	"k8s.io/klog/v2"
26
27	v1 "k8s.io/api/core/v1"
28	"k8s.io/apimachinery/pkg/runtime"
29	"k8s.io/apimachinery/pkg/types"
30	"k8s.io/apimachinery/pkg/util/sets"
31	"k8s.io/client-go/tools/record"
32	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
33	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
34	sc "k8s.io/kubernetes/pkg/securitycontext"
35	hashutil "k8s.io/kubernetes/pkg/util/hash"
36	"k8s.io/kubernetes/third_party/forked/golang/expansion"
37	utilsnet "k8s.io/utils/net"
38)
39
40// HandlerRunner runs a lifecycle handler for a container.
41type HandlerRunner interface {
42	Run(containerID ContainerID, pod *v1.Pod, container *v1.Container, handler *v1.Handler) (string, error)
43}
44
45// RuntimeHelper wraps kubelet to make container runtime
46// able to get necessary informations like the RunContainerOptions, DNS settings, Host IP.
47type RuntimeHelper interface {
48	GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) (contOpts *RunContainerOptions, cleanupAction func(), err error)
49	GetPodDNS(pod *v1.Pod) (dnsConfig *runtimeapi.DNSConfig, err error)
50	// GetPodCgroupParent returns the CgroupName identifier, and its literal cgroupfs form on the host
51	// of a pod.
52	GetPodCgroupParent(pod *v1.Pod) string
53	GetPodDir(podUID types.UID) string
54	GeneratePodHostNameAndDomain(pod *v1.Pod) (hostname string, hostDomain string, err error)
55	// GetExtraSupplementalGroupsForPod returns a list of the extra
56	// supplemental groups for the Pod. These extra supplemental groups come
57	// from annotations on persistent volumes that the pod depends on.
58	GetExtraSupplementalGroupsForPod(pod *v1.Pod) []int64
59}
60
61// ShouldContainerBeRestarted checks whether a container needs to be restarted.
62// TODO(yifan): Think about how to refactor this.
63func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus *PodStatus) bool {
64	// Once a pod has been marked deleted, it should not be restarted
65	if pod.DeletionTimestamp != nil {
66		return false
67	}
68	// Get latest container status.
69	status := podStatus.FindContainerStatusByName(container.Name)
70	// If the container was never started before, we should start it.
71	// NOTE(random-liu): If all historical containers were GC'd, we'll also return true here.
72	if status == nil {
73		return true
74	}
75	// Check whether container is running
76	if status.State == ContainerStateRunning {
77		return false
78	}
79	// Always restart container in the unknown, or in the created state.
80	if status.State == ContainerStateUnknown || status.State == ContainerStateCreated {
81		return true
82	}
83	// Check RestartPolicy for dead container
84	if pod.Spec.RestartPolicy == v1.RestartPolicyNever {
85		klog.V(4).InfoS("Already ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name)
86		return false
87	}
88	if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure {
89		// Check the exit code.
90		if status.ExitCode == 0 {
91			klog.V(4).InfoS("Already successfully ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name)
92			return false
93		}
94	}
95	return true
96}
97
98// HashContainer returns the hash of the container. It is used to compare
99// the running container with its desired spec.
100// Note: remember to update hashValues in container_hash_test.go as well.
101func HashContainer(container *v1.Container) uint64 {
102	hash := fnv.New32a()
103	// Omit nil or empty field when calculating hash value
104	// Please see https://github.com/kubernetes/kubernetes/issues/53644
105	containerJSON, _ := json.Marshal(container)
106	hashutil.DeepHashObject(hash, containerJSON)
107	return uint64(hash.Sum32())
108}
109
110// envVarsToMap constructs a map of environment name to value from a slice
111// of env vars.
112func envVarsToMap(envs []EnvVar) map[string]string {
113	result := map[string]string{}
114	for _, env := range envs {
115		result[env.Name] = env.Value
116	}
117	return result
118}
119
120// v1EnvVarsToMap constructs a map of environment name to value from a slice
121// of env vars.
122func v1EnvVarsToMap(envs []v1.EnvVar) map[string]string {
123	result := map[string]string{}
124	for _, env := range envs {
125		result[env.Name] = env.Value
126	}
127
128	return result
129}
130
131// ExpandContainerCommandOnlyStatic substitutes only static environment variable values from the
132// container environment definitions. This does *not* include valueFrom substitutions.
133// TODO: callers should use ExpandContainerCommandAndArgs with a fully resolved list of environment.
134func ExpandContainerCommandOnlyStatic(containerCommand []string, envs []v1.EnvVar) (command []string) {
135	mapping := expansion.MappingFuncFor(v1EnvVarsToMap(envs))
136	if len(containerCommand) != 0 {
137		for _, cmd := range containerCommand {
138			command = append(command, expansion.Expand(cmd, mapping))
139		}
140	}
141	return command
142}
143
144// ExpandContainerVolumeMounts expands the subpath of the given VolumeMount by replacing variable references with the values of given EnvVar.
145func ExpandContainerVolumeMounts(mount v1.VolumeMount, envs []EnvVar) (string, error) {
146
147	envmap := envVarsToMap(envs)
148	missingKeys := sets.NewString()
149	expanded := expansion.Expand(mount.SubPathExpr, func(key string) string {
150		value, ok := envmap[key]
151		if !ok || len(value) == 0 {
152			missingKeys.Insert(key)
153		}
154		return value
155	})
156
157	if len(missingKeys) > 0 {
158		return "", fmt.Errorf("missing value for %s", strings.Join(missingKeys.List(), ", "))
159	}
160	return expanded, nil
161}
162
163// ExpandContainerCommandAndArgs expands the given Container's command by replacing variable references `with the values of given EnvVar.
164func ExpandContainerCommandAndArgs(container *v1.Container, envs []EnvVar) (command []string, args []string) {
165	mapping := expansion.MappingFuncFor(envVarsToMap(envs))
166
167	if len(container.Command) != 0 {
168		for _, cmd := range container.Command {
169			command = append(command, expansion.Expand(cmd, mapping))
170		}
171	}
172
173	if len(container.Args) != 0 {
174		for _, arg := range container.Args {
175			args = append(args, expansion.Expand(arg, mapping))
176		}
177	}
178
179	return command, args
180}
181
182// FilterEventRecorder creates an event recorder to record object's event except implicitly required container's, like infra container.
183func FilterEventRecorder(recorder record.EventRecorder) record.EventRecorder {
184	return &innerEventRecorder{
185		recorder: recorder,
186	}
187}
188
189type innerEventRecorder struct {
190	recorder record.EventRecorder
191}
192
193func (irecorder *innerEventRecorder) shouldRecordEvent(object runtime.Object) (*v1.ObjectReference, bool) {
194	if ref, ok := object.(*v1.ObjectReference); ok {
195		// this check is needed AFTER the cast. See https://github.com/kubernetes/kubernetes/issues/95552
196		if ref == nil {
197			return nil, false
198		}
199		if !strings.HasPrefix(ref.FieldPath, ImplicitContainerPrefix) {
200			return ref, true
201		}
202	}
203	return nil, false
204}
205
206func (irecorder *innerEventRecorder) Event(object runtime.Object, eventtype, reason, message string) {
207	if ref, ok := irecorder.shouldRecordEvent(object); ok {
208		irecorder.recorder.Event(ref, eventtype, reason, message)
209	}
210}
211
212func (irecorder *innerEventRecorder) Eventf(object runtime.Object, eventtype, reason, messageFmt string, args ...interface{}) {
213	if ref, ok := irecorder.shouldRecordEvent(object); ok {
214		irecorder.recorder.Eventf(ref, eventtype, reason, messageFmt, args...)
215	}
216
217}
218
219func (irecorder *innerEventRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) {
220	if ref, ok := irecorder.shouldRecordEvent(object); ok {
221		irecorder.recorder.AnnotatedEventf(ref, annotations, eventtype, reason, messageFmt, args...)
222	}
223
224}
225
226// IsHostNetworkPod returns whether the host networking requested for the given Pod.
227// Pod must not be nil.
228func IsHostNetworkPod(pod *v1.Pod) bool {
229	return pod.Spec.HostNetwork
230}
231
232// ConvertPodStatusToRunningPod returns Pod given PodStatus and container runtime string.
233// TODO(random-liu): Convert PodStatus to running Pod, should be deprecated soon
234func ConvertPodStatusToRunningPod(runtimeName string, podStatus *PodStatus) Pod {
235	runningPod := Pod{
236		ID:        podStatus.ID,
237		Name:      podStatus.Name,
238		Namespace: podStatus.Namespace,
239	}
240	for _, containerStatus := range podStatus.ContainerStatuses {
241		if containerStatus.State != ContainerStateRunning {
242			continue
243		}
244		container := &Container{
245			ID:      containerStatus.ID,
246			Name:    containerStatus.Name,
247			Image:   containerStatus.Image,
248			ImageID: containerStatus.ImageID,
249			Hash:    containerStatus.Hash,
250			State:   containerStatus.State,
251		}
252		runningPod.Containers = append(runningPod.Containers, container)
253	}
254
255	// Populate sandboxes in kubecontainer.Pod
256	for _, sandbox := range podStatus.SandboxStatuses {
257		runningPod.Sandboxes = append(runningPod.Sandboxes, &Container{
258			ID:    ContainerID{Type: runtimeName, ID: sandbox.Id},
259			State: SandboxToContainerState(sandbox.State),
260		})
261	}
262	return runningPod
263}
264
265// SandboxToContainerState converts runtimeapi.PodSandboxState to
266// kubecontainer.State.
267// This is only needed because we need to return sandboxes as if they were
268// kubecontainer.Containers to avoid substantial changes to PLEG.
269// TODO: Remove this once it becomes obsolete.
270func SandboxToContainerState(state runtimeapi.PodSandboxState) State {
271	switch state {
272	case runtimeapi.PodSandboxState_SANDBOX_READY:
273		return ContainerStateRunning
274	case runtimeapi.PodSandboxState_SANDBOX_NOTREADY:
275		return ContainerStateExited
276	}
277	return ContainerStateUnknown
278}
279
280// FormatPod returns a string representing a pod in a human readable format,
281// with pod UID as part of the string.
282func FormatPod(pod *Pod) string {
283	// Use underscore as the delimiter because it is not allowed in pod name
284	// (DNS subdomain format), while allowed in the container name format.
285	return fmt.Sprintf("%s_%s(%s)", pod.Name, pod.Namespace, pod.ID)
286}
287
288// GetContainerSpec gets the container spec by containerName.
289func GetContainerSpec(pod *v1.Pod, containerName string) *v1.Container {
290	var containerSpec *v1.Container
291	podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(c *v1.Container, containerType podutil.ContainerType) bool {
292		if containerName == c.Name {
293			containerSpec = c
294			return false
295		}
296		return true
297	})
298	return containerSpec
299}
300
301// HasPrivilegedContainer returns true if any of the containers in the pod are privileged.
302func HasPrivilegedContainer(pod *v1.Pod) bool {
303	var hasPrivileged bool
304	podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(c *v1.Container, containerType podutil.ContainerType) bool {
305		if c.SecurityContext != nil && c.SecurityContext.Privileged != nil && *c.SecurityContext.Privileged {
306			hasPrivileged = true
307			return false
308		}
309		return true
310	})
311	return hasPrivileged
312}
313
314// HasWindowsHostProcessContainer returns true if any of the containers in a pod are HostProcess containers.
315func HasWindowsHostProcessContainer(pod *v1.Pod) bool {
316	var hasHostProcess bool
317	podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(c *v1.Container, containerType podutil.ContainerType) bool {
318		if sc.HasWindowsHostProcessRequest(pod, c) {
319			hasHostProcess = true
320			return false
321		}
322		return true
323	})
324
325	return hasHostProcess
326}
327
328// AllContainersAreWindowsHostProcess returns true if all containres in a pod are HostProcess containers.
329func AllContainersAreWindowsHostProcess(pod *v1.Pod) bool {
330	allHostProcess := true
331	podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(c *v1.Container, containerType podutil.ContainerType) bool {
332		if !sc.HasWindowsHostProcessRequest(pod, c) {
333			allHostProcess = false
334			return false
335		}
336		return true
337	})
338
339	return allHostProcess
340}
341
342// MakePortMappings creates internal port mapping from api port mapping.
343func MakePortMappings(container *v1.Container) (ports []PortMapping) {
344	names := make(map[string]struct{})
345	for _, p := range container.Ports {
346		pm := PortMapping{
347			HostPort:      int(p.HostPort),
348			ContainerPort: int(p.ContainerPort),
349			Protocol:      p.Protocol,
350			HostIP:        p.HostIP,
351		}
352
353		// We need to determine the address family this entry applies to. We do this to ensure
354		// duplicate containerPort / protocol rules work across different address families.
355		// https://github.com/kubernetes/kubernetes/issues/82373
356		family := "any"
357		if p.HostIP != "" {
358			if utilsnet.IsIPv6String(p.HostIP) {
359				family = "v6"
360			} else {
361				family = "v4"
362			}
363		}
364
365		var name string = p.Name
366		if name == "" {
367			name = fmt.Sprintf("%s-%s-%s:%d:%d", family, p.Protocol, p.HostIP, p.ContainerPort, p.HostPort)
368		}
369
370		// Protect against a port name being used more than once in a container.
371		if _, ok := names[name]; ok {
372			klog.InfoS("Port name conflicted, it is defined more than once", "portName", name)
373			continue
374		}
375		ports = append(ports, pm)
376		names[name] = struct{}{}
377	}
378	return
379}
380