1/*
2Copyright 2017 The Kubernetes Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package scheduler
18
19// This file tests the Taint feature.
20
21import (
22	"context"
23	"fmt"
24	"testing"
25	"time"
26
27	v1 "k8s.io/api/core/v1"
28	"k8s.io/apimachinery/pkg/api/resource"
29	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30	"k8s.io/apimachinery/pkg/runtime/schema"
31	"k8s.io/client-go/informers"
32	"k8s.io/client-go/kubernetes"
33	restclient "k8s.io/client-go/rest"
34	"k8s.io/kubernetes/pkg/controller/nodelifecycle"
35	"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
36	pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
37	testutils "k8s.io/kubernetes/test/integration/util"
38)
39
40func newPod(nsName, name string, req, limit v1.ResourceList) *v1.Pod {
41	return &v1.Pod{
42		ObjectMeta: metav1.ObjectMeta{
43			Name:      name,
44			Namespace: nsName,
45		},
46		Spec: v1.PodSpec{
47			Containers: []v1.Container{
48				{
49					Name:  "busybox",
50					Image: "busybox",
51					Resources: v1.ResourceRequirements{
52						Requests: req,
53						Limits:   limit,
54					},
55				},
56			},
57		},
58	}
59}
60
61// TestTaintNodeByCondition tests related cases for TaintNodeByCondition feature.
62func TestTaintNodeByCondition(t *testing.T) {
63	// Build PodToleration Admission.
64	admission := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
65
66	testCtx := testutils.InitTestAPIServer(t, "default", admission)
67
68	// Build clientset and informers for controllers.
69	externalClientset := kubernetes.NewForConfigOrDie(&restclient.Config{
70		QPS:           -1,
71		Host:          testCtx.HTTPServer.URL,
72		ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
73	externalInformers := informers.NewSharedInformerFactory(externalClientset, 0)
74
75	admission.SetExternalKubeClientSet(externalClientset)
76	admission.SetExternalKubeInformerFactory(externalInformers)
77
78	testCtx = testutils.InitTestScheduler(t, testCtx, nil)
79	defer testutils.CleanupTest(t, testCtx)
80
81	cs := testCtx.ClientSet
82	nsName := testCtx.NS.Name
83
84	// Start NodeLifecycleController for taint.
85	nc, err := nodelifecycle.NewNodeLifecycleController(
86		externalInformers.Coordination().V1().Leases(),
87		externalInformers.Core().V1().Pods(),
88		externalInformers.Core().V1().Nodes(),
89		externalInformers.Apps().V1().DaemonSets(),
90		cs,
91		time.Hour,   // Node monitor grace period
92		time.Second, // Node startup grace period
93		time.Second, // Node monitor period
94		time.Second, // Pod eviction timeout
95		100,         // Eviction limiter QPS
96		100,         // Secondary eviction limiter QPS
97		100,         // Large cluster threshold
98		100,         // Unhealthy zone threshold
99		true,        // Run taint manager
100	)
101	if err != nil {
102		t.Errorf("Failed to create node controller: %v", err)
103		return
104	}
105
106	// Waiting for all controllers to sync
107	externalInformers.Start(testCtx.Ctx.Done())
108	externalInformers.WaitForCacheSync(testCtx.Ctx.Done())
109	testutils.SyncInformerFactory(testCtx)
110
111	// Run all controllers
112	go nc.Run(testCtx.Ctx.Done())
113	go testCtx.Scheduler.Run(testCtx.Ctx)
114
115	// -------------------------------------------
116	// Test TaintNodeByCondition feature.
117	// -------------------------------------------
118	nodeRes := v1.ResourceList{
119		v1.ResourceCPU:    resource.MustParse("4000m"),
120		v1.ResourceMemory: resource.MustParse("16Gi"),
121		v1.ResourcePods:   resource.MustParse("110"),
122	}
123
124	podRes := v1.ResourceList{
125		v1.ResourceCPU:    resource.MustParse("100m"),
126		v1.ResourceMemory: resource.MustParse("100Mi"),
127	}
128
129	notReadyToleration := v1.Toleration{
130		Key:      v1.TaintNodeNotReady,
131		Operator: v1.TolerationOpExists,
132		Effect:   v1.TaintEffectNoSchedule,
133	}
134
135	unschedulableToleration := v1.Toleration{
136		Key:      v1.TaintNodeUnschedulable,
137		Operator: v1.TolerationOpExists,
138		Effect:   v1.TaintEffectNoSchedule,
139	}
140
141	memoryPressureToleration := v1.Toleration{
142		Key:      v1.TaintNodeMemoryPressure,
143		Operator: v1.TolerationOpExists,
144		Effect:   v1.TaintEffectNoSchedule,
145	}
146
147	diskPressureToleration := v1.Toleration{
148		Key:      v1.TaintNodeDiskPressure,
149		Operator: v1.TolerationOpExists,
150		Effect:   v1.TaintEffectNoSchedule,
151	}
152
153	networkUnavailableToleration := v1.Toleration{
154		Key:      v1.TaintNodeNetworkUnavailable,
155		Operator: v1.TolerationOpExists,
156		Effect:   v1.TaintEffectNoSchedule,
157	}
158
159	pidPressureToleration := v1.Toleration{
160		Key:      v1.TaintNodePIDPressure,
161		Operator: v1.TolerationOpExists,
162		Effect:   v1.TaintEffectNoSchedule,
163	}
164
165	bestEffortPod := newPod(nsName, "besteffort-pod", nil, nil)
166	burstablePod := newPod(nsName, "burstable-pod", podRes, nil)
167	guaranteePod := newPod(nsName, "guarantee-pod", podRes, podRes)
168
169	type podCase struct {
170		pod         *v1.Pod
171		tolerations []v1.Toleration
172		fits        bool
173	}
174
175	// switch to table driven testings
176	tests := []struct {
177		name           string
178		existingTaints []v1.Taint
179		nodeConditions []v1.NodeCondition
180		unschedulable  bool
181		expectedTaints []v1.Taint
182		pods           []podCase
183	}{
184		{
185			name: "not-ready node",
186			nodeConditions: []v1.NodeCondition{
187				{
188					Type:   v1.NodeReady,
189					Status: v1.ConditionFalse,
190				},
191			},
192			expectedTaints: []v1.Taint{
193				{
194					Key:    v1.TaintNodeNotReady,
195					Effect: v1.TaintEffectNoSchedule,
196				},
197			},
198			pods: []podCase{
199				{
200					pod:  bestEffortPod,
201					fits: false,
202				},
203				{
204					pod:  burstablePod,
205					fits: false,
206				},
207				{
208					pod:  guaranteePod,
209					fits: false,
210				},
211				{
212					pod:         bestEffortPod,
213					tolerations: []v1.Toleration{notReadyToleration},
214					fits:        true,
215				},
216			},
217		},
218		{
219			name:          "unschedulable node",
220			unschedulable: true, // node.spec.unschedulable = true
221			nodeConditions: []v1.NodeCondition{
222				{
223					Type:   v1.NodeReady,
224					Status: v1.ConditionTrue,
225				},
226			},
227			expectedTaints: []v1.Taint{
228				{
229					Key:    v1.TaintNodeUnschedulable,
230					Effect: v1.TaintEffectNoSchedule,
231				},
232			},
233			pods: []podCase{
234				{
235					pod:  bestEffortPod,
236					fits: false,
237				},
238				{
239					pod:  burstablePod,
240					fits: false,
241				},
242				{
243					pod:  guaranteePod,
244					fits: false,
245				},
246				{
247					pod:         bestEffortPod,
248					tolerations: []v1.Toleration{unschedulableToleration},
249					fits:        true,
250				},
251			},
252		},
253		{
254			name: "memory pressure node",
255			nodeConditions: []v1.NodeCondition{
256				{
257					Type:   v1.NodeMemoryPressure,
258					Status: v1.ConditionTrue,
259				},
260				{
261					Type:   v1.NodeReady,
262					Status: v1.ConditionTrue,
263				},
264			},
265			expectedTaints: []v1.Taint{
266				{
267					Key:    v1.TaintNodeMemoryPressure,
268					Effect: v1.TaintEffectNoSchedule,
269				},
270			},
271			// In MemoryPressure condition, both Burstable and Guarantee pods are scheduled;
272			// BestEffort pod with toleration are also scheduled.
273			pods: []podCase{
274				{
275					pod:  bestEffortPod,
276					fits: false,
277				},
278				{
279					pod:         bestEffortPod,
280					tolerations: []v1.Toleration{memoryPressureToleration},
281					fits:        true,
282				},
283				{
284					pod:         bestEffortPod,
285					tolerations: []v1.Toleration{diskPressureToleration},
286					fits:        false,
287				},
288				{
289					pod:  burstablePod,
290					fits: true,
291				},
292				{
293					pod:  guaranteePod,
294					fits: true,
295				},
296			},
297		},
298		{
299			name: "disk pressure node",
300			nodeConditions: []v1.NodeCondition{
301				{
302					Type:   v1.NodeDiskPressure,
303					Status: v1.ConditionTrue,
304				},
305				{
306					Type:   v1.NodeReady,
307					Status: v1.ConditionTrue,
308				},
309			},
310			expectedTaints: []v1.Taint{
311				{
312					Key:    v1.TaintNodeDiskPressure,
313					Effect: v1.TaintEffectNoSchedule,
314				},
315			},
316			// In DiskPressure condition, only pods with toleration can be scheduled.
317			pods: []podCase{
318				{
319					pod:  bestEffortPod,
320					fits: false,
321				},
322				{
323					pod:  burstablePod,
324					fits: false,
325				},
326				{
327					pod:  guaranteePod,
328					fits: false,
329				},
330				{
331					pod:         bestEffortPod,
332					tolerations: []v1.Toleration{diskPressureToleration},
333					fits:        true,
334				},
335				{
336					pod:         bestEffortPod,
337					tolerations: []v1.Toleration{memoryPressureToleration},
338					fits:        false,
339				},
340			},
341		},
342		{
343			name: "network unavailable and node is ready",
344			nodeConditions: []v1.NodeCondition{
345				{
346					Type:   v1.NodeNetworkUnavailable,
347					Status: v1.ConditionTrue,
348				},
349				{
350					Type:   v1.NodeReady,
351					Status: v1.ConditionTrue,
352				},
353			},
354			expectedTaints: []v1.Taint{
355				{
356					Key:    v1.TaintNodeNetworkUnavailable,
357					Effect: v1.TaintEffectNoSchedule,
358				},
359			},
360			pods: []podCase{
361				{
362					pod:  bestEffortPod,
363					fits: false,
364				},
365				{
366					pod:  burstablePod,
367					fits: false,
368				},
369				{
370					pod:  guaranteePod,
371					fits: false,
372				},
373				{
374					pod: burstablePod,
375					tolerations: []v1.Toleration{
376						networkUnavailableToleration,
377					},
378					fits: true,
379				},
380			},
381		},
382		{
383			name: "network unavailable and node is not ready",
384			nodeConditions: []v1.NodeCondition{
385				{
386					Type:   v1.NodeNetworkUnavailable,
387					Status: v1.ConditionTrue,
388				},
389				{
390					Type:   v1.NodeReady,
391					Status: v1.ConditionFalse,
392				},
393			},
394			expectedTaints: []v1.Taint{
395				{
396					Key:    v1.TaintNodeNetworkUnavailable,
397					Effect: v1.TaintEffectNoSchedule,
398				},
399				{
400					Key:    v1.TaintNodeNotReady,
401					Effect: v1.TaintEffectNoSchedule,
402				},
403			},
404			pods: []podCase{
405				{
406					pod:  bestEffortPod,
407					fits: false,
408				},
409				{
410					pod:  burstablePod,
411					fits: false,
412				},
413				{
414					pod:  guaranteePod,
415					fits: false,
416				},
417				{
418					pod: burstablePod,
419					tolerations: []v1.Toleration{
420						networkUnavailableToleration,
421					},
422					fits: false,
423				},
424				{
425					pod: burstablePod,
426					tolerations: []v1.Toleration{
427						networkUnavailableToleration,
428						notReadyToleration,
429					},
430					fits: true,
431				},
432			},
433		},
434		{
435			name: "pid pressure node",
436			nodeConditions: []v1.NodeCondition{
437				{
438					Type:   v1.NodePIDPressure,
439					Status: v1.ConditionTrue,
440				},
441				{
442					Type:   v1.NodeReady,
443					Status: v1.ConditionTrue,
444				},
445			},
446			expectedTaints: []v1.Taint{
447				{
448					Key:    v1.TaintNodePIDPressure,
449					Effect: v1.TaintEffectNoSchedule,
450				},
451			},
452			pods: []podCase{
453				{
454					pod:  bestEffortPod,
455					fits: false,
456				},
457				{
458					pod:  burstablePod,
459					fits: false,
460				},
461				{
462					pod:  guaranteePod,
463					fits: false,
464				},
465				{
466					pod:         bestEffortPod,
467					tolerations: []v1.Toleration{pidPressureToleration},
468					fits:        true,
469				},
470			},
471		},
472		{
473			name: "multi taints on node",
474			nodeConditions: []v1.NodeCondition{
475				{
476					Type:   v1.NodePIDPressure,
477					Status: v1.ConditionTrue,
478				},
479				{
480					Type:   v1.NodeMemoryPressure,
481					Status: v1.ConditionTrue,
482				},
483				{
484					Type:   v1.NodeDiskPressure,
485					Status: v1.ConditionTrue,
486				},
487				{
488					Type:   v1.NodeReady,
489					Status: v1.ConditionTrue,
490				},
491			},
492			expectedTaints: []v1.Taint{
493				{
494					Key:    v1.TaintNodeDiskPressure,
495					Effect: v1.TaintEffectNoSchedule,
496				},
497				{
498					Key:    v1.TaintNodeMemoryPressure,
499					Effect: v1.TaintEffectNoSchedule,
500				},
501				{
502					Key:    v1.TaintNodePIDPressure,
503					Effect: v1.TaintEffectNoSchedule,
504				},
505			},
506		},
507	}
508
509	for _, test := range tests {
510		t.Run(test.name, func(t *testing.T) {
511			node := &v1.Node{
512				ObjectMeta: metav1.ObjectMeta{
513					Name: "node-1",
514				},
515				Spec: v1.NodeSpec{
516					Unschedulable: test.unschedulable,
517					Taints:        test.existingTaints,
518				},
519				Status: v1.NodeStatus{
520					Capacity:    nodeRes,
521					Allocatable: nodeRes,
522					Conditions:  test.nodeConditions,
523				},
524			}
525
526			if _, err := cs.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}); err != nil {
527				t.Errorf("Failed to create node, err: %v", err)
528			}
529			if err := testutils.WaitForNodeTaints(cs, node, test.expectedTaints); err != nil {
530				node, err = cs.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
531				if err != nil {
532					t.Errorf("Failed to get node <%s>", node.Name)
533				}
534
535				t.Errorf("Failed to taint node <%s>, expected: %v, got: %v, err: %v", node.Name, test.expectedTaints, node.Spec.Taints, err)
536			}
537
538			var pods []*v1.Pod
539			for i, p := range test.pods {
540				pod := p.pod.DeepCopy()
541				pod.Name = fmt.Sprintf("%s-%d", pod.Name, i)
542				pod.Spec.Tolerations = p.tolerations
543
544				createdPod, err := cs.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
545				if err != nil {
546					t.Fatalf("Failed to create pod %s/%s, error: %v",
547						pod.Namespace, pod.Name, err)
548				}
549
550				pods = append(pods, createdPod)
551
552				if p.fits {
553					if err := testutils.WaitForPodToSchedule(cs, createdPod); err != nil {
554						t.Errorf("Failed to schedule pod %s/%s on the node, err: %v",
555							pod.Namespace, pod.Name, err)
556					}
557				} else {
558					if err := waitForPodUnschedulable(cs, createdPod); err != nil {
559						t.Errorf("Unschedulable pod %s/%s gets scheduled on the node, err: %v",
560							pod.Namespace, pod.Name, err)
561					}
562				}
563			}
564
565			testutils.CleanupPods(cs, t, pods)
566			testutils.CleanupNodes(cs, t)
567			testutils.WaitForSchedulerCacheCleanup(testCtx.Scheduler, t)
568		})
569	}
570}
571