1/*
2Copyright 2015 The Kubernetes Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package metrics
18
19import (
20	"fmt"
21	"sync"
22	"time"
23
24	"k8s.io/component-base/metrics"
25	"k8s.io/component-base/metrics/legacyregistry"
26
27	corev1 "k8s.io/api/core/v1"
28	"k8s.io/apimachinery/pkg/types"
29	utilfeature "k8s.io/apiserver/pkg/util/feature"
30	"k8s.io/klog/v2"
31	"k8s.io/kubernetes/pkg/features"
32)
33
34// This const block defines the metric names for the kubelet metrics.
35const (
36	KubeletSubsystem             = "kubelet"
37	NodeNameKey                  = "node_name"
38	NodeLabelKey                 = "node"
39	PodWorkerDurationKey         = "pod_worker_duration_seconds"
40	PodStartDurationKey          = "pod_start_duration_seconds"
41	CgroupManagerOperationsKey   = "cgroup_manager_duration_seconds"
42	PodWorkerStartDurationKey    = "pod_worker_start_duration_seconds"
43	PLEGRelistDurationKey        = "pleg_relist_duration_seconds"
44	PLEGDiscardEventsKey         = "pleg_discard_events"
45	PLEGRelistIntervalKey        = "pleg_relist_interval_seconds"
46	PLEGLastSeenKey              = "pleg_last_seen_seconds"
47	EvictionsKey                 = "evictions"
48	EvictionStatsAgeKey          = "eviction_stats_age_seconds"
49	PreemptionsKey               = "preemptions"
50	VolumeStatsCapacityBytesKey  = "volume_stats_capacity_bytes"
51	VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
52	VolumeStatsUsedBytesKey      = "volume_stats_used_bytes"
53	VolumeStatsInodesKey         = "volume_stats_inodes"
54	VolumeStatsInodesFreeKey     = "volume_stats_inodes_free"
55	VolumeStatsInodesUsedKey     = "volume_stats_inodes_used"
56	RunningPodsKey               = "running_pods"
57	RunningContainersKey         = "running_containers"
58	// Metrics keys of remote runtime operations
59	RuntimeOperationsKey         = "runtime_operations_total"
60	RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"
61	RuntimeOperationsErrorsKey   = "runtime_operations_errors_total"
62	// Metrics keys of device plugin operations
63	DevicePluginRegistrationCountKey  = "device_plugin_registration_total"
64	DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds"
65	// Metrics keys of pod resources operations
66	PodResourcesEndpointRequestsTotalKey          = "pod_resources_endpoint_requests_total"
67	PodResourcesEndpointRequestsListKey           = "pod_resources_endpoint_requests_list"
68	PodResourcesEndpointRequestsGetAllocatableKey = "pod_resources_endpoint_requests_get_allocatable"
69	PodResourcesEndpointErrorsListKey             = "pod_resources_endpoint_errors_list"
70	PodResourcesEndpointErrorsGetAllocatableKey   = "pod_resources_endpoint_errors_get_allocatable"
71
72	// Metric keys for node config
73	AssignedConfigKey             = "node_config_assigned"
74	ActiveConfigKey               = "node_config_active"
75	LastKnownGoodConfigKey        = "node_config_last_known_good"
76	ConfigErrorKey                = "node_config_error"
77	ConfigSourceLabelKey          = "node_config_source"
78	ConfigSourceLabelValueLocal   = "local"
79	ConfigUIDLabelKey             = "node_config_uid"
80	ConfigResourceVersionLabelKey = "node_config_resource_version"
81	KubeletConfigKeyLabelKey      = "node_config_kubelet_key"
82
83	// Metrics keys for RuntimeClass
84	RunPodSandboxDurationKey = "run_podsandbox_duration_seconds"
85	RunPodSandboxErrorsKey   = "run_podsandbox_errors_total"
86
87	// Metrics to keep track of total number of Pods and Containers started
88	StartedPodsTotalKey             = "started_pods_total"
89	StartedPodsErrorsTotalKey       = "started_pods_errors_total"
90	StartedContainersTotalKey       = "started_containers_total"
91	StartedContainersErrorsTotalKey = "started_containers_errors_total"
92
93	// Metrics to track ephemeral container usage by this kubelet
94	ManagedEphemeralContainersKey = "managed_ephemeral_containers"
95
96	// Values used in metric labels
97	Container          = "container"
98	InitContainer      = "init_container"
99	EphemeralContainer = "ephemeral_container"
100)
101
102var (
103	// NodeName is a Gauge that tracks the ode's name. The count is always 1.
104	NodeName = metrics.NewGaugeVec(
105		&metrics.GaugeOpts{
106			Subsystem:      KubeletSubsystem,
107			Name:           NodeNameKey,
108			Help:           "The node's name. The count is always 1.",
109			StabilityLevel: metrics.ALPHA,
110		},
111		[]string{NodeLabelKey},
112	)
113	// ContainersPerPodCount is a Histogram that tracks the number of containers per pod.
114	ContainersPerPodCount = metrics.NewHistogram(
115		&metrics.HistogramOpts{
116			Subsystem:      KubeletSubsystem,
117			Name:           "containers_per_pod_count",
118			Help:           "The number of containers per pod.",
119			Buckets:        metrics.ExponentialBuckets(1, 2, 5),
120			StabilityLevel: metrics.ALPHA,
121		},
122	)
123	// PodWorkerDuration is a Histogram that tracks the duration (in seconds) in takes to sync a single pod.
124	// Broken down by the operation type.
125	PodWorkerDuration = metrics.NewHistogramVec(
126		&metrics.HistogramOpts{
127			Subsystem:      KubeletSubsystem,
128			Name:           PodWorkerDurationKey,
129			Help:           "Duration in seconds to sync a single pod. Broken down by operation type: create, update, or sync",
130			Buckets:        metrics.DefBuckets,
131			StabilityLevel: metrics.ALPHA,
132		},
133		[]string{"operation_type"},
134	)
135	// PodStartDuration is a Histogram that tracks the duration (in seconds) it takes for a single pod to go from pending to running.
136	PodStartDuration = metrics.NewHistogram(
137		&metrics.HistogramOpts{
138			Subsystem:      KubeletSubsystem,
139			Name:           PodStartDurationKey,
140			Help:           "Duration in seconds for a single pod to go from pending to running.",
141			Buckets:        metrics.DefBuckets,
142			StabilityLevel: metrics.ALPHA,
143		},
144	)
145	// CgroupManagerDuration is a Histogram that tracks the duration (in seconds) it takes for cgroup manager operations to complete.
146	// Broken down by method.
147	CgroupManagerDuration = metrics.NewHistogramVec(
148		&metrics.HistogramOpts{
149			Subsystem:      KubeletSubsystem,
150			Name:           CgroupManagerOperationsKey,
151			Help:           "Duration in seconds for cgroup manager operations. Broken down by method.",
152			Buckets:        metrics.DefBuckets,
153			StabilityLevel: metrics.ALPHA,
154		},
155		[]string{"operation_type"},
156	)
157	// PodWorkerStartDuration is a Histogram that tracks the duration (in seconds) it takes from seeing a pod to starting a worker.
158	PodWorkerStartDuration = metrics.NewHistogram(
159		&metrics.HistogramOpts{
160			Subsystem:      KubeletSubsystem,
161			Name:           PodWorkerStartDurationKey,
162			Help:           "Duration in seconds from seeing a pod to starting a worker.",
163			Buckets:        metrics.DefBuckets,
164			StabilityLevel: metrics.ALPHA,
165		},
166	)
167	// PLEGRelistDuration is a Histogram that tracks the duration (in seconds) it takes for relisting pods in the Kubelet's
168	// Pod Lifecycle Event Generator (PLEG).
169	PLEGRelistDuration = metrics.NewHistogram(
170		&metrics.HistogramOpts{
171			Subsystem:      KubeletSubsystem,
172			Name:           PLEGRelistDurationKey,
173			Help:           "Duration in seconds for relisting pods in PLEG.",
174			Buckets:        metrics.DefBuckets,
175			StabilityLevel: metrics.ALPHA,
176		},
177	)
178	// PLEGDiscardEvents is a Counter that tracks the number of discarding events in the Kubelet's Pod Lifecycle Event Generator (PLEG).
179	PLEGDiscardEvents = metrics.NewCounter(
180		&metrics.CounterOpts{
181			Subsystem:      KubeletSubsystem,
182			Name:           PLEGDiscardEventsKey,
183			Help:           "The number of discard events in PLEG.",
184			StabilityLevel: metrics.ALPHA,
185		},
186	)
187
188	// PLEGRelistInterval is a Histogram that tracks the intervals (in seconds) between relisting in the Kubelet's
189	// Pod Lifecycle Event Generator (PLEG).
190	PLEGRelistInterval = metrics.NewHistogram(
191		&metrics.HistogramOpts{
192			Subsystem:      KubeletSubsystem,
193			Name:           PLEGRelistIntervalKey,
194			Help:           "Interval in seconds between relisting in PLEG.",
195			Buckets:        metrics.DefBuckets,
196			StabilityLevel: metrics.ALPHA,
197		},
198	)
199	// PLEGLastSeen is a Gauge giving the Unix timestamp when the Kubelet's
200	// Pod Lifecycle Event Generator (PLEG) was last seen active.
201	PLEGLastSeen = metrics.NewGauge(
202		&metrics.GaugeOpts{
203			Subsystem:      KubeletSubsystem,
204			Name:           PLEGLastSeenKey,
205			Help:           "Timestamp in seconds when PLEG was last seen active.",
206			StabilityLevel: metrics.ALPHA,
207		},
208	)
209	// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
210	// Broken down by operation type.
211	RuntimeOperations = metrics.NewCounterVec(
212		&metrics.CounterOpts{
213			Subsystem:      KubeletSubsystem,
214			Name:           RuntimeOperationsKey,
215			Help:           "Cumulative number of runtime operations by operation type.",
216			StabilityLevel: metrics.ALPHA,
217		},
218		[]string{"operation_type"},
219	)
220	// RuntimeOperationsDuration is a Histogram that tracks the duration (in seconds) for remote runtime operations to complete.
221	// Broken down by operation type.
222	RuntimeOperationsDuration = metrics.NewHistogramVec(
223		&metrics.HistogramOpts{
224			Subsystem:      KubeletSubsystem,
225			Name:           RuntimeOperationsDurationKey,
226			Help:           "Duration in seconds of runtime operations. Broken down by operation type.",
227			Buckets:        metrics.ExponentialBuckets(.005, 2.5, 14),
228			StabilityLevel: metrics.ALPHA,
229		},
230		[]string{"operation_type"},
231	)
232	// RuntimeOperationsErrors is a Counter that tracks the cumulative number of remote runtime operations errors.
233	// Broken down by operation type.
234	RuntimeOperationsErrors = metrics.NewCounterVec(
235		&metrics.CounterOpts{
236			Subsystem:      KubeletSubsystem,
237			Name:           RuntimeOperationsErrorsKey,
238			Help:           "Cumulative number of runtime operation errors by operation type.",
239			StabilityLevel: metrics.ALPHA,
240		},
241		[]string{"operation_type"},
242	)
243	// Evictions is a Counter that tracks the cumulative number of pod evictions initiated by the kubelet.
244	// Broken down by eviction signal.
245	Evictions = metrics.NewCounterVec(
246		&metrics.CounterOpts{
247			Subsystem:      KubeletSubsystem,
248			Name:           EvictionsKey,
249			Help:           "Cumulative number of pod evictions by eviction signal",
250			StabilityLevel: metrics.ALPHA,
251		},
252		[]string{"eviction_signal"},
253	)
254	// EvictionStatsAge is a Histogram that tracks the time (in seconds) between when stats are collected and when a pod is evicted
255	// based on those stats. Broken down by eviction signal.
256	EvictionStatsAge = metrics.NewHistogramVec(
257		&metrics.HistogramOpts{
258			Subsystem:      KubeletSubsystem,
259			Name:           EvictionStatsAgeKey,
260			Help:           "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
261			Buckets:        metrics.DefBuckets,
262			StabilityLevel: metrics.ALPHA,
263		},
264		[]string{"eviction_signal"},
265	)
266	// Preemptions is a Counter that tracks the cumulative number of pod preemptions initiated by the kubelet.
267	// Broken down by preemption signal. A preemption is only recorded for one resource, the sum of all signals
268	// is the number of preemptions on the given node.
269	Preemptions = metrics.NewCounterVec(
270		&metrics.CounterOpts{
271			Subsystem:      KubeletSubsystem,
272			Name:           PreemptionsKey,
273			Help:           "Cumulative number of pod preemptions by preemption resource",
274			StabilityLevel: metrics.ALPHA,
275		},
276		[]string{"preemption_signal"},
277	)
278	// DevicePluginRegistrationCount is a Counter that tracks the cumulative number of device plugin registrations.
279	// Broken down by resource name.
280	DevicePluginRegistrationCount = metrics.NewCounterVec(
281		&metrics.CounterOpts{
282			Subsystem:      KubeletSubsystem,
283			Name:           DevicePluginRegistrationCountKey,
284			Help:           "Cumulative number of device plugin registrations. Broken down by resource name.",
285			StabilityLevel: metrics.ALPHA,
286		},
287		[]string{"resource_name"},
288	)
289	// DevicePluginAllocationDuration is a Histogram that tracks the duration (in seconds) to serve a device plugin allocation request.
290	// Broken down by resource name.
291	DevicePluginAllocationDuration = metrics.NewHistogramVec(
292		&metrics.HistogramOpts{
293			Subsystem:      KubeletSubsystem,
294			Name:           DevicePluginAllocationDurationKey,
295			Help:           "Duration in seconds to serve a device plugin Allocation request. Broken down by resource name.",
296			Buckets:        metrics.DefBuckets,
297			StabilityLevel: metrics.ALPHA,
298		},
299		[]string{"resource_name"},
300	)
301
302	// PodResourcesEndpointRequestsTotalCount is a Counter that tracks the cumulative number of requests to the PodResource endpoints.
303	// Broken down by server API version.
304	PodResourcesEndpointRequestsTotalCount = metrics.NewCounterVec(
305		&metrics.CounterOpts{
306			Subsystem:      KubeletSubsystem,
307			Name:           PodResourcesEndpointRequestsTotalKey,
308			Help:           "Cumulative number of requests to the PodResource endpoint. Broken down by server api version.",
309			StabilityLevel: metrics.ALPHA,
310		},
311		[]string{"server_api_version"},
312	)
313
314	// PodResourcesEndpointRequestsListCount is a Counter that tracks the number of requests to the PodResource List() endpoint.
315	// Broken down by server API version.
316	PodResourcesEndpointRequestsListCount = metrics.NewCounterVec(
317		&metrics.CounterOpts{
318			Subsystem:      KubeletSubsystem,
319			Name:           PodResourcesEndpointRequestsListKey,
320			Help:           "Number of requests to the PodResource List endpoint. Broken down by server api version.",
321			StabilityLevel: metrics.ALPHA,
322		},
323		[]string{"server_api_version"},
324	)
325
326	// PodResourcesEndpointRequestsGetAllocatableCount is a Counter that tracks the number of requests to the PodResource GetAllocatableResources() endpoint.
327	// Broken down by server API version.
328	PodResourcesEndpointRequestsGetAllocatableCount = metrics.NewCounterVec(
329		&metrics.CounterOpts{
330			Subsystem:      KubeletSubsystem,
331			Name:           PodResourcesEndpointRequestsGetAllocatableKey,
332			Help:           "Number of requests to the PodResource GetAllocatableResources endpoint. Broken down by server api version.",
333			StabilityLevel: metrics.ALPHA,
334		},
335		[]string{"server_api_version"},
336	)
337
338	// PodResourcesEndpointErrorsListCount is a Counter that tracks the number of errors returned by he PodResource List() endpoint.
339	// Broken down by server API version.
340	PodResourcesEndpointErrorsListCount = metrics.NewCounterVec(
341		&metrics.CounterOpts{
342			Subsystem:      KubeletSubsystem,
343			Name:           PodResourcesEndpointErrorsListKey,
344			Help:           "Number of requests to the PodResource List endpoint which returned error. Broken down by server api version.",
345			StabilityLevel: metrics.ALPHA,
346		},
347		[]string{"server_api_version"},
348	)
349
350	// PodResourcesEndpointErrorsGetAllocatableCount is a Counter that tracks the number of errors returned by the PodResource GetAllocatableResources() endpoint.
351	// Broken down by server API version.
352	PodResourcesEndpointErrorsGetAllocatableCount = metrics.NewCounterVec(
353		&metrics.CounterOpts{
354			Subsystem:      KubeletSubsystem,
355			Name:           PodResourcesEndpointErrorsGetAllocatableKey,
356			Help:           "Number of requests to the PodResource GetAllocatableResources endpoint which returned error. Broken down by server api version.",
357			StabilityLevel: metrics.ALPHA,
358		},
359		[]string{"server_api_version"},
360	)
361
362	// Metrics for node config
363
364	// AssignedConfig is a Gauge that is set 1 if the Kubelet has a NodeConfig assigned.
365	AssignedConfig = metrics.NewGaugeVec(
366		&metrics.GaugeOpts{
367			Subsystem:         KubeletSubsystem,
368			Name:              AssignedConfigKey,
369			Help:              "The node's understanding of intended config. The count is always 1.",
370			DeprecatedVersion: "1.22.0",
371			StabilityLevel:    metrics.ALPHA,
372		},
373		[]string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
374	)
375	// ActiveConfig is a Gauge that is set to 1 if the Kubelet has an active NodeConfig.
376	ActiveConfig = metrics.NewGaugeVec(
377		&metrics.GaugeOpts{
378			Subsystem:         KubeletSubsystem,
379			Name:              ActiveConfigKey,
380			Help:              "The config source the node is actively using. The count is always 1.",
381			DeprecatedVersion: "1.22.0",
382			StabilityLevel:    metrics.ALPHA,
383		},
384		[]string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
385	)
386	// LastKnownGoodConfig is a Gauge that is set to 1 if the Kubelet has a NodeConfig it can fall back to if there
387	// are certain errors.
388	LastKnownGoodConfig = metrics.NewGaugeVec(
389		&metrics.GaugeOpts{
390			Subsystem:         KubeletSubsystem,
391			Name:              LastKnownGoodConfigKey,
392			Help:              "The config source the node will fall back to when it encounters certain errors. The count is always 1.",
393			DeprecatedVersion: "1.22.0",
394			StabilityLevel:    metrics.ALPHA,
395		},
396		[]string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
397	)
398	// ConfigError is a Gauge that is set to 1 if the node is experiencing a configuration-related error.
399	ConfigError = metrics.NewGauge(
400		&metrics.GaugeOpts{
401			Subsystem:         KubeletSubsystem,
402			Name:              ConfigErrorKey,
403			Help:              "This metric is true (1) if the node is experiencing a configuration-related error, false (0) otherwise.",
404			DeprecatedVersion: "1.22.0",
405			StabilityLevel:    metrics.ALPHA,
406		},
407	)
408	// RunPodSandboxDuration is a Histogram that tracks the duration (in seconds) it takes to run Pod Sandbox operations.
409	// Broken down by RuntimeClass.Handler.
410	RunPodSandboxDuration = metrics.NewHistogramVec(
411		&metrics.HistogramOpts{
412			Subsystem: KubeletSubsystem,
413			Name:      RunPodSandboxDurationKey,
414			Help:      "Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.Handler.",
415			// Use DefBuckets for now, will customize the buckets if necessary.
416			Buckets:        metrics.DefBuckets,
417			StabilityLevel: metrics.ALPHA,
418		},
419		[]string{"runtime_handler"},
420	)
421	// RunPodSandboxErrors is a Counter that tracks the cumulative number of Pod Sandbox operations errors.
422	// Broken down by RuntimeClass.Handler.
423	RunPodSandboxErrors = metrics.NewCounterVec(
424		&metrics.CounterOpts{
425			Subsystem:      KubeletSubsystem,
426			Name:           RunPodSandboxErrorsKey,
427			Help:           "Cumulative number of the run_podsandbox operation errors by RuntimeClass.Handler.",
428			StabilityLevel: metrics.ALPHA,
429		},
430		[]string{"runtime_handler"},
431	)
432
433	// RunningPodCount is a gauge that tracks the number of Pods currently with a running sandbox
434	// It is used to expose the kubelet internal state: how many pods have running containers in the container runtime, and mainly for debugging purpose.
435	RunningPodCount = metrics.NewGauge(
436		&metrics.GaugeOpts{
437			Subsystem:      KubeletSubsystem,
438			Name:           RunningPodsKey,
439			Help:           "Number of pods that have a running pod sandbox",
440			StabilityLevel: metrics.ALPHA,
441		},
442	)
443	// RunningContainerCount is a gauge that tracks the number of containers currently running
444	RunningContainerCount = metrics.NewGaugeVec(
445		&metrics.GaugeOpts{
446			Subsystem:      KubeletSubsystem,
447			Name:           RunningContainersKey,
448			Help:           "Number of containers currently running",
449			StabilityLevel: metrics.ALPHA,
450		},
451		[]string{"container_state"},
452	)
453	// StartedPodsTotal is a counter that tracks pod sandbox creation operations
454	StartedPodsTotal = metrics.NewCounter(
455		&metrics.CounterOpts{
456			Subsystem:      KubeletSubsystem,
457			Name:           StartedPodsTotalKey,
458			Help:           "Cumulative number of pods started",
459			StabilityLevel: metrics.ALPHA,
460		},
461	)
462	// StartedPodsErrorsTotal is a counter that tracks the number of errors creating pod sandboxes
463	StartedPodsErrorsTotal = metrics.NewCounterVec(
464		&metrics.CounterOpts{
465			Subsystem:      KubeletSubsystem,
466			Name:           StartedPodsErrorsTotalKey,
467			Help:           "Cumulative number of errors when starting pods",
468			StabilityLevel: metrics.ALPHA,
469		},
470		[]string{"message"},
471	)
472	// StartedContainersTotal is a counter that tracks the number of container creation operations
473	StartedContainersTotal = metrics.NewCounterVec(
474		&metrics.CounterOpts{
475			Subsystem:      KubeletSubsystem,
476			Name:           StartedContainersTotalKey,
477			Help:           "Cumulative number of containers started",
478			StabilityLevel: metrics.ALPHA,
479		},
480		[]string{"container_type"},
481	)
482	// StartedContainersTotal is a counter that tracks the number of errors creating containers
483	StartedContainersErrorsTotal = metrics.NewCounterVec(
484		&metrics.CounterOpts{
485			Subsystem:      KubeletSubsystem,
486			Name:           StartedContainersErrorsTotalKey,
487			Help:           "Cumulative number of errors when starting containers",
488			StabilityLevel: metrics.ALPHA,
489		},
490		[]string{"container_type", "code"},
491	)
492	// ManagedEphemeralContainers is a gauge that indicates how many ephemeral containers are managed by this kubelet.
493	ManagedEphemeralContainers = metrics.NewGauge(
494		&metrics.GaugeOpts{
495			Subsystem:      KubeletSubsystem,
496			Name:           ManagedEphemeralContainersKey,
497			Help:           "Current number of ephemeral containers in pods managed by this kubelet. Ephemeral containers will be ignored if disabled by the EphemeralContainers feature gate, and this number will be 0.",
498			StabilityLevel: metrics.ALPHA,
499		},
500	)
501)
502
503var registerMetrics sync.Once
504
505// Register registers all metrics.
506func Register(collectors ...metrics.StableCollector) {
507	// Register the metrics.
508	registerMetrics.Do(func() {
509		legacyregistry.MustRegister(NodeName)
510		legacyregistry.MustRegister(PodWorkerDuration)
511		legacyregistry.MustRegister(PodStartDuration)
512		legacyregistry.MustRegister(CgroupManagerDuration)
513		legacyregistry.MustRegister(PodWorkerStartDuration)
514		legacyregistry.MustRegister(ContainersPerPodCount)
515		legacyregistry.MustRegister(PLEGRelistDuration)
516		legacyregistry.MustRegister(PLEGDiscardEvents)
517		legacyregistry.MustRegister(PLEGRelistInterval)
518		legacyregistry.MustRegister(PLEGLastSeen)
519		legacyregistry.MustRegister(RuntimeOperations)
520		legacyregistry.MustRegister(RuntimeOperationsDuration)
521		legacyregistry.MustRegister(RuntimeOperationsErrors)
522		legacyregistry.MustRegister(Evictions)
523		legacyregistry.MustRegister(EvictionStatsAge)
524		legacyregistry.MustRegister(Preemptions)
525		legacyregistry.MustRegister(DevicePluginRegistrationCount)
526		legacyregistry.MustRegister(DevicePluginAllocationDuration)
527		legacyregistry.MustRegister(RunningContainerCount)
528		legacyregistry.MustRegister(RunningPodCount)
529		legacyregistry.MustRegister(ManagedEphemeralContainers)
530		legacyregistry.MustRegister(StartedPodsTotal)
531		legacyregistry.MustRegister(StartedPodsErrorsTotal)
532		legacyregistry.MustRegister(StartedContainersTotal)
533		legacyregistry.MustRegister(StartedContainersErrorsTotal)
534		legacyregistry.MustRegister(RunPodSandboxDuration)
535		legacyregistry.MustRegister(RunPodSandboxErrors)
536		if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
537			legacyregistry.MustRegister(AssignedConfig)
538			legacyregistry.MustRegister(ActiveConfig)
539			legacyregistry.MustRegister(LastKnownGoodConfig)
540			legacyregistry.MustRegister(ConfigError)
541		}
542		for _, collector := range collectors {
543			legacyregistry.CustomMustRegister(collector)
544		}
545	})
546}
547
548// GetGather returns the gatherer. It used by test case outside current package.
549func GetGather() metrics.Gatherer {
550	return legacyregistry.DefaultGatherer
551}
552
553// SinceInSeconds gets the time since the specified start in seconds.
554func SinceInSeconds(start time.Time) float64 {
555	return time.Since(start).Seconds()
556}
557
558const configMapAPIPathFmt = "/api/v1/namespaces/%s/configmaps/%s"
559
560func configLabels(source *corev1.NodeConfigSource) (map[string]string, error) {
561	if source == nil {
562		return map[string]string{
563			// prometheus requires all of the labels that can be set on the metric
564			ConfigSourceLabelKey:          "local",
565			ConfigUIDLabelKey:             "",
566			ConfigResourceVersionLabelKey: "",
567			KubeletConfigKeyLabelKey:      "",
568		}, nil
569	}
570	if source.ConfigMap != nil {
571		return map[string]string{
572			ConfigSourceLabelKey:          fmt.Sprintf(configMapAPIPathFmt, source.ConfigMap.Namespace, source.ConfigMap.Name),
573			ConfigUIDLabelKey:             string(source.ConfigMap.UID),
574			ConfigResourceVersionLabelKey: source.ConfigMap.ResourceVersion,
575			KubeletConfigKeyLabelKey:      source.ConfigMap.KubeletConfigKey,
576		}, nil
577	}
578	return nil, fmt.Errorf("unrecognized config source type, all source subfields were nil")
579}
580
581// track labels across metric updates, so we can delete old label sets and prevent leaks
582var assignedConfigLabels map[string]string
583
584// SetAssignedConfig tracks labels according to the assigned NodeConfig. It also tracks labels
585// across metric updates so old labels can be safely deleted.
586func SetAssignedConfig(source *corev1.NodeConfigSource) error {
587	// compute the timeseries labels from the source
588	labels, err := configLabels(source)
589	if err != nil {
590		return err
591	}
592	// clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
593	if !AssignedConfig.Delete(assignedConfigLabels) {
594		klog.InfoS("Failed to delete metric for labels. This may result in ambiguity from multiple metrics concurrently indicating different assigned configs.", "labels", assignedConfigLabels)
595	}
596	// record the new timeseries
597	assignedConfigLabels = labels
598	// expose the new timeseries with a constant count of 1
599	AssignedConfig.With(assignedConfigLabels).Set(1)
600	return nil
601}
602
603// track labels across metric updates, so we can delete old label sets and prevent leaks
604var activeConfigLabels map[string]string
605
606// SetActiveConfig tracks labels according to the NodeConfig that is currently used by the Kubelet.
607// It also tracks labels across metric updates so old labels can be safely deleted.
608func SetActiveConfig(source *corev1.NodeConfigSource) error {
609	// compute the timeseries labels from the source
610	labels, err := configLabels(source)
611	if err != nil {
612		return err
613	}
614	// clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
615	if !ActiveConfig.Delete(activeConfigLabels) {
616		klog.InfoS("Failed to delete metric for labels. This may result in ambiguity from multiple metrics concurrently indicating different active configs.", "labels", activeConfigLabels)
617	}
618	// record the new timeseries
619	activeConfigLabels = labels
620	// expose the new timeseries with a constant count of 1
621	ActiveConfig.With(activeConfigLabels).Set(1)
622	return nil
623}
624
625// track labels across metric updates, so we can delete old label sets and prevent leaks
626var lastKnownGoodConfigLabels map[string]string
627
628// SetLastKnownGoodConfig tracks labels according to the NodeConfig that was successfully applied last.
629// It also tracks labels across metric updates so old labels can be safely deleted.
630func SetLastKnownGoodConfig(source *corev1.NodeConfigSource) error {
631	// compute the timeseries labels from the source
632	labels, err := configLabels(source)
633	if err != nil {
634		return err
635	}
636	// clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
637	if !LastKnownGoodConfig.Delete(lastKnownGoodConfigLabels) {
638		klog.InfoS("Failed to delete metric for labels. This may result in ambiguity from multiple metrics concurrently indicating different last known good configs.", "labels", lastKnownGoodConfigLabels)
639	}
640	// record the new timeseries
641	lastKnownGoodConfigLabels = labels
642	// expose the new timeseries with a constant count of 1
643	LastKnownGoodConfig.With(lastKnownGoodConfigLabels).Set(1)
644	return nil
645}
646
647// SetConfigError sets a the ConfigError metric to 1 in case any errors were encountered.
648func SetConfigError(err bool) {
649	if err {
650		ConfigError.Set(1)
651	} else {
652		ConfigError.Set(0)
653	}
654}
655
656// SetNodeName sets the NodeName Gauge to 1.
657func SetNodeName(name types.NodeName) {
658	NodeName.WithLabelValues(string(name)).Set(1)
659}
660