1/* 2Copyright 2015 The Kubernetes Authors. 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17package container 18 19import ( 20 "encoding/json" 21 "fmt" 22 "hash/fnv" 23 "strings" 24 25 "k8s.io/klog/v2" 26 27 v1 "k8s.io/api/core/v1" 28 "k8s.io/apimachinery/pkg/runtime" 29 "k8s.io/apimachinery/pkg/types" 30 "k8s.io/apimachinery/pkg/util/sets" 31 "k8s.io/client-go/tools/record" 32 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" 33 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 34 sc "k8s.io/kubernetes/pkg/securitycontext" 35 hashutil "k8s.io/kubernetes/pkg/util/hash" 36 "k8s.io/kubernetes/third_party/forked/golang/expansion" 37 utilsnet "k8s.io/utils/net" 38) 39 40// HandlerRunner runs a lifecycle handler for a container. 41type HandlerRunner interface { 42 Run(containerID ContainerID, pod *v1.Pod, container *v1.Container, handler *v1.Handler) (string, error) 43} 44 45// RuntimeHelper wraps kubelet to make container runtime 46// able to get necessary informations like the RunContainerOptions, DNS settings, Host IP. 47type RuntimeHelper interface { 48 GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) (contOpts *RunContainerOptions, cleanupAction func(), err error) 49 GetPodDNS(pod *v1.Pod) (dnsConfig *runtimeapi.DNSConfig, err error) 50 // GetPodCgroupParent returns the CgroupName identifier, and its literal cgroupfs form on the host 51 // of a pod. 52 GetPodCgroupParent(pod *v1.Pod) string 53 GetPodDir(podUID types.UID) string 54 GeneratePodHostNameAndDomain(pod *v1.Pod) (hostname string, hostDomain string, err error) 55 // GetExtraSupplementalGroupsForPod returns a list of the extra 56 // supplemental groups for the Pod. These extra supplemental groups come 57 // from annotations on persistent volumes that the pod depends on. 58 GetExtraSupplementalGroupsForPod(pod *v1.Pod) []int64 59} 60 61// ShouldContainerBeRestarted checks whether a container needs to be restarted. 62// TODO(yifan): Think about how to refactor this. 63func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus *PodStatus) bool { 64 // Once a pod has been marked deleted, it should not be restarted 65 if pod.DeletionTimestamp != nil { 66 return false 67 } 68 // Get latest container status. 69 status := podStatus.FindContainerStatusByName(container.Name) 70 // If the container was never started before, we should start it. 71 // NOTE(random-liu): If all historical containers were GC'd, we'll also return true here. 72 if status == nil { 73 return true 74 } 75 // Check whether container is running 76 if status.State == ContainerStateRunning { 77 return false 78 } 79 // Always restart container in the unknown, or in the created state. 80 if status.State == ContainerStateUnknown || status.State == ContainerStateCreated { 81 return true 82 } 83 // Check RestartPolicy for dead container 84 if pod.Spec.RestartPolicy == v1.RestartPolicyNever { 85 klog.V(4).InfoS("Already ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name) 86 return false 87 } 88 if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure { 89 // Check the exit code. 90 if status.ExitCode == 0 { 91 klog.V(4).InfoS("Already successfully ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name) 92 return false 93 } 94 } 95 return true 96} 97 98// HashContainer returns the hash of the container. It is used to compare 99// the running container with its desired spec. 100// Note: remember to update hashValues in container_hash_test.go as well. 101func HashContainer(container *v1.Container) uint64 { 102 hash := fnv.New32a() 103 // Omit nil or empty field when calculating hash value 104 // Please see https://github.com/kubernetes/kubernetes/issues/53644 105 containerJSON, _ := json.Marshal(container) 106 hashutil.DeepHashObject(hash, containerJSON) 107 return uint64(hash.Sum32()) 108} 109 110// envVarsToMap constructs a map of environment name to value from a slice 111// of env vars. 112func envVarsToMap(envs []EnvVar) map[string]string { 113 result := map[string]string{} 114 for _, env := range envs { 115 result[env.Name] = env.Value 116 } 117 return result 118} 119 120// v1EnvVarsToMap constructs a map of environment name to value from a slice 121// of env vars. 122func v1EnvVarsToMap(envs []v1.EnvVar) map[string]string { 123 result := map[string]string{} 124 for _, env := range envs { 125 result[env.Name] = env.Value 126 } 127 128 return result 129} 130 131// ExpandContainerCommandOnlyStatic substitutes only static environment variable values from the 132// container environment definitions. This does *not* include valueFrom substitutions. 133// TODO: callers should use ExpandContainerCommandAndArgs with a fully resolved list of environment. 134func ExpandContainerCommandOnlyStatic(containerCommand []string, envs []v1.EnvVar) (command []string) { 135 mapping := expansion.MappingFuncFor(v1EnvVarsToMap(envs)) 136 if len(containerCommand) != 0 { 137 for _, cmd := range containerCommand { 138 command = append(command, expansion.Expand(cmd, mapping)) 139 } 140 } 141 return command 142} 143 144// ExpandContainerVolumeMounts expands the subpath of the given VolumeMount by replacing variable references with the values of given EnvVar. 145func ExpandContainerVolumeMounts(mount v1.VolumeMount, envs []EnvVar) (string, error) { 146 147 envmap := envVarsToMap(envs) 148 missingKeys := sets.NewString() 149 expanded := expansion.Expand(mount.SubPathExpr, func(key string) string { 150 value, ok := envmap[key] 151 if !ok || len(value) == 0 { 152 missingKeys.Insert(key) 153 } 154 return value 155 }) 156 157 if len(missingKeys) > 0 { 158 return "", fmt.Errorf("missing value for %s", strings.Join(missingKeys.List(), ", ")) 159 } 160 return expanded, nil 161} 162 163// ExpandContainerCommandAndArgs expands the given Container's command by replacing variable references `with the values of given EnvVar. 164func ExpandContainerCommandAndArgs(container *v1.Container, envs []EnvVar) (command []string, args []string) { 165 mapping := expansion.MappingFuncFor(envVarsToMap(envs)) 166 167 if len(container.Command) != 0 { 168 for _, cmd := range container.Command { 169 command = append(command, expansion.Expand(cmd, mapping)) 170 } 171 } 172 173 if len(container.Args) != 0 { 174 for _, arg := range container.Args { 175 args = append(args, expansion.Expand(arg, mapping)) 176 } 177 } 178 179 return command, args 180} 181 182// FilterEventRecorder creates an event recorder to record object's event except implicitly required container's, like infra container. 183func FilterEventRecorder(recorder record.EventRecorder) record.EventRecorder { 184 return &innerEventRecorder{ 185 recorder: recorder, 186 } 187} 188 189type innerEventRecorder struct { 190 recorder record.EventRecorder 191} 192 193func (irecorder *innerEventRecorder) shouldRecordEvent(object runtime.Object) (*v1.ObjectReference, bool) { 194 if ref, ok := object.(*v1.ObjectReference); ok { 195 // this check is needed AFTER the cast. See https://github.com/kubernetes/kubernetes/issues/95552 196 if ref == nil { 197 return nil, false 198 } 199 if !strings.HasPrefix(ref.FieldPath, ImplicitContainerPrefix) { 200 return ref, true 201 } 202 } 203 return nil, false 204} 205 206func (irecorder *innerEventRecorder) Event(object runtime.Object, eventtype, reason, message string) { 207 if ref, ok := irecorder.shouldRecordEvent(object); ok { 208 irecorder.recorder.Event(ref, eventtype, reason, message) 209 } 210} 211 212func (irecorder *innerEventRecorder) Eventf(object runtime.Object, eventtype, reason, messageFmt string, args ...interface{}) { 213 if ref, ok := irecorder.shouldRecordEvent(object); ok { 214 irecorder.recorder.Eventf(ref, eventtype, reason, messageFmt, args...) 215 } 216 217} 218 219func (irecorder *innerEventRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) { 220 if ref, ok := irecorder.shouldRecordEvent(object); ok { 221 irecorder.recorder.AnnotatedEventf(ref, annotations, eventtype, reason, messageFmt, args...) 222 } 223 224} 225 226// IsHostNetworkPod returns whether the host networking requested for the given Pod. 227// Pod must not be nil. 228func IsHostNetworkPod(pod *v1.Pod) bool { 229 return pod.Spec.HostNetwork 230} 231 232// ConvertPodStatusToRunningPod returns Pod given PodStatus and container runtime string. 233// TODO(random-liu): Convert PodStatus to running Pod, should be deprecated soon 234func ConvertPodStatusToRunningPod(runtimeName string, podStatus *PodStatus) Pod { 235 runningPod := Pod{ 236 ID: podStatus.ID, 237 Name: podStatus.Name, 238 Namespace: podStatus.Namespace, 239 } 240 for _, containerStatus := range podStatus.ContainerStatuses { 241 if containerStatus.State != ContainerStateRunning { 242 continue 243 } 244 container := &Container{ 245 ID: containerStatus.ID, 246 Name: containerStatus.Name, 247 Image: containerStatus.Image, 248 ImageID: containerStatus.ImageID, 249 Hash: containerStatus.Hash, 250 State: containerStatus.State, 251 } 252 runningPod.Containers = append(runningPod.Containers, container) 253 } 254 255 // Populate sandboxes in kubecontainer.Pod 256 for _, sandbox := range podStatus.SandboxStatuses { 257 runningPod.Sandboxes = append(runningPod.Sandboxes, &Container{ 258 ID: ContainerID{Type: runtimeName, ID: sandbox.Id}, 259 State: SandboxToContainerState(sandbox.State), 260 }) 261 } 262 return runningPod 263} 264 265// SandboxToContainerState converts runtimeapi.PodSandboxState to 266// kubecontainer.State. 267// This is only needed because we need to return sandboxes as if they were 268// kubecontainer.Containers to avoid substantial changes to PLEG. 269// TODO: Remove this once it becomes obsolete. 270func SandboxToContainerState(state runtimeapi.PodSandboxState) State { 271 switch state { 272 case runtimeapi.PodSandboxState_SANDBOX_READY: 273 return ContainerStateRunning 274 case runtimeapi.PodSandboxState_SANDBOX_NOTREADY: 275 return ContainerStateExited 276 } 277 return ContainerStateUnknown 278} 279 280// FormatPod returns a string representing a pod in a human readable format, 281// with pod UID as part of the string. 282func FormatPod(pod *Pod) string { 283 // Use underscore as the delimiter because it is not allowed in pod name 284 // (DNS subdomain format), while allowed in the container name format. 285 return fmt.Sprintf("%s_%s(%s)", pod.Name, pod.Namespace, pod.ID) 286} 287 288// GetContainerSpec gets the container spec by containerName. 289func GetContainerSpec(pod *v1.Pod, containerName string) *v1.Container { 290 var containerSpec *v1.Container 291 podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(c *v1.Container, containerType podutil.ContainerType) bool { 292 if containerName == c.Name { 293 containerSpec = c 294 return false 295 } 296 return true 297 }) 298 return containerSpec 299} 300 301// HasPrivilegedContainer returns true if any of the containers in the pod are privileged. 302func HasPrivilegedContainer(pod *v1.Pod) bool { 303 var hasPrivileged bool 304 podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(c *v1.Container, containerType podutil.ContainerType) bool { 305 if c.SecurityContext != nil && c.SecurityContext.Privileged != nil && *c.SecurityContext.Privileged { 306 hasPrivileged = true 307 return false 308 } 309 return true 310 }) 311 return hasPrivileged 312} 313 314// HasWindowsHostProcessContainer returns true if any of the containers in a pod are HostProcess containers. 315func HasWindowsHostProcessContainer(pod *v1.Pod) bool { 316 var hasHostProcess bool 317 podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(c *v1.Container, containerType podutil.ContainerType) bool { 318 if sc.HasWindowsHostProcessRequest(pod, c) { 319 hasHostProcess = true 320 return false 321 } 322 return true 323 }) 324 325 return hasHostProcess 326} 327 328// AllContainersAreWindowsHostProcess returns true if all containres in a pod are HostProcess containers. 329func AllContainersAreWindowsHostProcess(pod *v1.Pod) bool { 330 allHostProcess := true 331 podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(c *v1.Container, containerType podutil.ContainerType) bool { 332 if !sc.HasWindowsHostProcessRequest(pod, c) { 333 allHostProcess = false 334 return false 335 } 336 return true 337 }) 338 339 return allHostProcess 340} 341 342// MakePortMappings creates internal port mapping from api port mapping. 343func MakePortMappings(container *v1.Container) (ports []PortMapping) { 344 names := make(map[string]struct{}) 345 for _, p := range container.Ports { 346 pm := PortMapping{ 347 HostPort: int(p.HostPort), 348 ContainerPort: int(p.ContainerPort), 349 Protocol: p.Protocol, 350 HostIP: p.HostIP, 351 } 352 353 // We need to determine the address family this entry applies to. We do this to ensure 354 // duplicate containerPort / protocol rules work across different address families. 355 // https://github.com/kubernetes/kubernetes/issues/82373 356 family := "any" 357 if p.HostIP != "" { 358 if utilsnet.IsIPv6String(p.HostIP) { 359 family = "v6" 360 } else { 361 family = "v4" 362 } 363 } 364 365 var name string = p.Name 366 if name == "" { 367 name = fmt.Sprintf("%s-%s-%s:%d:%d", family, p.Protocol, p.HostIP, p.ContainerPort, p.HostPort) 368 } 369 370 // Protect against a port name being used more than once in a container. 371 if _, ok := names[name]; ok { 372 klog.InfoS("Port name conflicted, it is defined more than once", "portName", name) 373 continue 374 } 375 ports = append(ports, pm) 376 names[name] = struct{}{} 377 } 378 return 379} 380