Replace godep with dep

This commit is contained in:
Manuel de Brito Fontes 2017-10-06 17:26:14 -03:00
parent 1e7489927c
commit bf5616c65b
14883 changed files with 3937406 additions and 361781 deletions

83
vendor/k8s.io/kubernetes/test/e2e/scheduling/BUILD generated vendored Normal file
View file

@ -0,0 +1,83 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"equivalence_cache_predicates.go",
"events.go",
"framework.go",
"nvidia-gpus.go",
"opaque_resource.go",
"predicates.go",
"preemption.go",
"priorities.go",
"rescheduler.go",
],
deps = [
"//pkg/api:go_default_library",
"//pkg/api/v1/helper:go_default_library",
"//pkg/api/v1/pod:go_default_library",
"//pkg/util/system:go_default_library",
"//pkg/util/version:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library",
"//test/utils:go_default_library",
"//test/utils/image:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/api/scheduling/v1alpha1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/yaml:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)
go_test(
name = "go_default_test",
srcs = ["taints_test.go"],
library = ":go_default_library",
tags = ["automanaged"],
deps = [
"//test/e2e/framework:go_default_library",
"//test/utils:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/watch:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
],
)

4
vendor/k8s.io/kubernetes/test/e2e/scheduling/OWNERS generated vendored Normal file
View file

@ -0,0 +1,4 @@
approvers:
- sig-scheduling-maintainers
reviewers:
- sig-scheduling

View file

@ -0,0 +1,286 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"fmt"
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/uuid"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/test/e2e/framework"
testutils "k8s.io/kubernetes/test/utils"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
_ "github.com/stretchr/testify/assert"
)
var _ = framework.KubeDescribe("EquivalenceCache [Serial]", func() {
var cs clientset.Interface
var nodeList *v1.NodeList
var masterNodes sets.String
var systemPodsNo int
var ns string
f := framework.NewDefaultFramework("equivalence-cache")
ignoreLabels := framework.ImagePullerLabels
BeforeEach(func() {
cs = f.ClientSet
ns = f.Namespace.Name
framework.WaitForAllNodesHealthy(cs, time.Minute)
masterNodes, nodeList = framework.GetMasterAndWorkerNodesOrDie(cs)
framework.ExpectNoError(framework.CheckTestingNSDeletedExcept(cs, ns))
// Every test case in this suite assumes that cluster add-on pods stay stable and
// cannot be run in parallel with any other test that touches Nodes or Pods.
// It is so because we need to have precise control on what's running in the cluster.
systemPods, err := framework.GetPodsInNamespace(cs, ns, ignoreLabels)
Expect(err).NotTo(HaveOccurred())
systemPodsNo = 0
for _, pod := range systemPods {
if !masterNodes.Has(pod.Spec.NodeName) && pod.DeletionTimestamp == nil {
systemPodsNo++
}
}
err = framework.WaitForPodsRunningReady(cs, api.NamespaceSystem, int32(systemPodsNo), int32(systemPodsNo), framework.PodReadyBeforeTimeout, ignoreLabels)
Expect(err).NotTo(HaveOccurred())
for _, node := range nodeList.Items {
framework.Logf("\nLogging pods the kubelet thinks is on node %v before test", node.Name)
framework.PrintAllKubeletPods(cs, node.Name)
}
})
// This test verifies that GeneralPredicates works as expected:
// When a replica pod (with HostPorts) is scheduled to a node, it will invalidate GeneralPredicates cache on this node,
// so that subsequent replica pods with same host port claim will be rejected.
// We enforce all replica pods bind to the same node so there will always be conflicts.
It("validates GeneralPredicates is properly invalidated when a pod is scheduled [Slow]", func() {
By("Launching a RC with two replica pods with HostPorts")
nodeName := getNodeThatCanRunPodWithoutToleration(f)
rcName := "host-port"
// bind all replicas to same node
nodeSelector := map[string]string{"kubernetes.io/hostname": nodeName}
By("One pod should be scheduled, the other should be rejected")
// CreateNodeSelectorPods creates RC with host port 4312
WaitForSchedulerAfterAction(f, func() error {
err := CreateNodeSelectorPods(f, rcName, 2, nodeSelector, false)
return err
}, rcName, false)
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, ns, rcName)
// the first replica pod is scheduled, and the second pod will be rejected.
verifyResult(cs, 1, 1, ns)
})
// This test verifies that MatchInterPodAffinity works as expected.
// In equivalence cache, it does not handle inter pod affinity (anti-affinity) specially (unless node label changed),
// because current predicates algorithm will ensure newly scheduled pod does not break existing affinity in cluster.
It("validates pod affinity works properly when new replica pod is scheduled", func() {
// create a pod running with label {security: S1}, and choose this node
nodeName, _ := runAndKeepPodWithLabelAndGetNodeName(f)
By("Trying to apply a random label on the found node.")
// we need to use real failure domains, since scheduler only know them
k := "failure-domain.beta.kubernetes.io/zone"
v := "equivalence-e2e-test"
oldValue := framework.AddOrUpdateLabelOnNodeAndReturnOldValue(cs, nodeName, k, v)
framework.ExpectNodeHasLabel(cs, nodeName, k, v)
// restore the node label
defer framework.AddOrUpdateLabelOnNode(cs, nodeName, k, oldValue)
By("Trying to schedule RC with Pod Affinity should success.")
framework.WaitForStableCluster(cs, masterNodes)
affinityRCName := "with-pod-affinity-" + string(uuid.NewUUID())
replica := 2
labelsMap := map[string]string{
"name": affinityRCName,
}
affinity := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: k,
Namespaces: []string{ns},
},
},
},
}
rc := getRCWithInterPodAffinity(affinityRCName, labelsMap, replica, affinity, framework.GetPauseImageName(f.ClientSet))
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, ns, affinityRCName)
// RC should be running successfully
// TODO: WaitForSchedulerAfterAction() can on be used to wait for failure event,
// not for successful RC, since no specific pod name can be provided.
_, err := cs.CoreV1().ReplicationControllers(ns).Create(rc)
framework.ExpectNoError(err)
framework.ExpectNoError(framework.WaitForControlledPodsRunning(cs, ns, affinityRCName, api.Kind("ReplicationController")))
By("Remove node failure domain label")
framework.RemoveLabelOffNode(cs, nodeName, k)
By("Trying to schedule another equivalent Pod should fail due to node label has been removed.")
// use scale to create another equivalent pod and wait for failure event
WaitForSchedulerAfterAction(f, func() error {
err := framework.ScaleRC(f.ClientSet, f.InternalClientset, ns, affinityRCName, uint(replica+1), false)
return err
}, affinityRCName, false)
// and this new pod should be rejected since node label has been updated
verifyReplicasResult(cs, replica, 1, ns, affinityRCName)
})
// This test verifies that MatchInterPodAffinity (anti-affinity) is respected as expected.
It("validates pod anti-affinity works properly when new replica pod is scheduled", func() {
By("Launching two pods on two distinct nodes to get two node names")
CreateHostPortPods(f, "host-port", 2, true)
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, ns, "host-port")
podList, err := cs.CoreV1().Pods(ns).List(metav1.ListOptions{})
framework.ExpectNoError(err)
Expect(len(podList.Items)).To(Equal(2))
nodeNames := []string{podList.Items[0].Spec.NodeName, podList.Items[1].Spec.NodeName}
Expect(nodeNames[0]).ToNot(Equal(nodeNames[1]))
By("Applying a random label to both nodes.")
k := "e2e.inter-pod-affinity.kubernetes.io/zone"
v := "equivalence-e2etest"
for _, nodeName := range nodeNames {
framework.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
framework.ExpectNodeHasLabel(cs, nodeName, k, v)
defer framework.RemoveLabelOffNode(cs, nodeName, k)
}
By("Trying to launch a pod with the service label on the selected nodes.")
// run a pod with label {"service": "S1"} and expect it to be running
runPausePod(f, pausePodConfig{
Name: "with-label-" + string(uuid.NewUUID()),
Labels: map[string]string{"service": "S1"},
NodeSelector: map[string]string{k: v}, // only launch on our two nodes
})
By("Trying to launch RC with podAntiAffinity on these two nodes should be rejected.")
labelRCName := "with-podantiaffinity-" + string(uuid.NewUUID())
replica := 2
labelsMap := map[string]string{
"name": labelRCName,
}
affinity := &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: k,
Namespaces: []string{ns},
},
},
},
}
rc := getRCWithInterPodAffinityNodeSelector(labelRCName, labelsMap, replica, affinity,
framework.GetPauseImageName(f.ClientSet), map[string]string{k: v})
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, ns, labelRCName)
WaitForSchedulerAfterAction(f, func() error {
_, err := cs.CoreV1().ReplicationControllers(ns).Create(rc)
return err
}, labelRCName, false)
// these two replicas should all be rejected since podAntiAffinity says it they anit-affinity with pod {"service": "S1"}
verifyReplicasResult(cs, 0, replica, ns, labelRCName)
})
})
// getRCWithInterPodAffinity returns RC with given affinity rules.
func getRCWithInterPodAffinity(name string, labelsMap map[string]string, replica int, affinity *v1.Affinity, image string) *v1.ReplicationController {
return getRCWithInterPodAffinityNodeSelector(name, labelsMap, replica, affinity, image, map[string]string{})
}
// getRCWithInterPodAffinity returns RC with given affinity rules and node selector.
func getRCWithInterPodAffinityNodeSelector(name string, labelsMap map[string]string, replica int, affinity *v1.Affinity, image string, nodeSelector map[string]string) *v1.ReplicationController {
replicaInt32 := int32(replica)
return &v1.ReplicationController{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Spec: v1.ReplicationControllerSpec{
Replicas: &replicaInt32,
Selector: labelsMap,
Template: &v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: labelsMap,
},
Spec: v1.PodSpec{
Affinity: affinity,
Containers: []v1.Container{
{
Name: name,
Image: image,
},
},
DNSPolicy: v1.DNSDefault,
NodeSelector: nodeSelector,
},
},
},
}
}
func CreateNodeSelectorPods(f *framework.Framework, id string, replicas int, nodeSelector map[string]string, expectRunning bool) error {
By(fmt.Sprintf("Running RC which reserves host port and defines node selector"))
config := &testutils.RCConfig{
Client: f.ClientSet,
InternalClient: f.InternalClientset,
Name: id,
Namespace: f.Namespace.Name,
Timeout: defaultTimeout,
Image: framework.GetPauseImageName(f.ClientSet),
Replicas: replicas,
HostPorts: map[string]int{"port1": 4321},
NodeSelector: nodeSelector,
}
err := framework.RunRC(*config)
if expectRunning {
return err
}
return nil
}

41
vendor/k8s.io/kubernetes/test/e2e/scheduling/events.go generated vendored Normal file
View file

@ -0,0 +1,41 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"fmt"
"strings"
"k8s.io/api/core/v1"
)
func scheduleSuccessEvent(podName, nodeName string) func(*v1.Event) bool {
return func(e *v1.Event) bool {
return e.Type == v1.EventTypeNormal &&
e.Reason == "Scheduled" &&
strings.HasPrefix(e.Name, podName) &&
strings.Contains(e.Message, fmt.Sprintf("Successfully assigned %v to %v", podName, nodeName))
}
}
func scheduleFailureEvent(podName string) func(*v1.Event) bool {
return func(e *v1.Event) bool {
return strings.HasPrefix(e.Name, podName) &&
e.Type == "Warning" &&
e.Reason == "FailedScheduling"
}
}

View file

@ -0,0 +1,23 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import "github.com/onsi/ginkgo"
func SIGDescribe(text string, body func()) bool {
return ginkgo.Describe("[sig-scheduling] "+text, body)
}

View file

@ -0,0 +1,265 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"io/ioutil"
"net/http"
"strings"
"time"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/uuid"
utilyaml "k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/test/e2e/framework"
imageutils "k8s.io/kubernetes/test/utils/image"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
const (
testPodNamePrefix = "nvidia-gpu-"
cosOSImage = "Container-Optimized OS from Google"
// Nvidia driver installation can take upwards of 5 minutes.
driverInstallTimeout = 10 * time.Minute
)
type podCreationFuncType func() *v1.Pod
var (
gpuResourceName v1.ResourceName
dsYamlUrl string
podCreationFunc podCreationFuncType
)
func makeCudaAdditionTestPod() *v1.Pod {
podName := testPodNamePrefix + string(uuid.NewUUID())
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{
{
Name: "vector-addition",
Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
},
},
VolumeMounts: []v1.VolumeMount{
{
Name: "nvidia-libraries",
MountPath: "/usr/local/nvidia/lib64",
},
},
},
},
Volumes: []v1.Volume{
{
Name: "nvidia-libraries",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{
Path: "/home/kubernetes/bin/nvidia/lib",
},
},
},
},
},
}
return testPod
}
func makeCudaAdditionDevicePluginTestPod() *v1.Pod {
podName := testPodNamePrefix + string(uuid.NewUUID())
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{
{
Name: "vector-addition",
Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
},
},
},
},
},
}
return testPod
}
func isClusterRunningCOS(f *framework.Framework) bool {
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err, "getting node list")
for _, node := range nodeList.Items {
if !strings.Contains(node.Status.NodeInfo.OSImage, cosOSImage) {
return false
}
}
return true
}
func areGPUsAvailableOnAllSchedulableNodes(f *framework.Framework) bool {
framework.Logf("Getting list of Nodes from API server")
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err, "getting node list")
for _, node := range nodeList.Items {
if node.Spec.Unschedulable {
continue
}
framework.Logf("gpuResourceName %s", gpuResourceName)
if val, ok := node.Status.Capacity[gpuResourceName]; !ok || val.Value() == 0 {
framework.Logf("Nvidia GPUs not available on Node: %q", node.Name)
return false
}
}
framework.Logf("Nvidia GPUs exist on all schedulable nodes")
return true
}
func getGPUsAvailable(f *framework.Framework) int64 {
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err, "getting node list")
var gpusAvailable int64
for _, node := range nodeList.Items {
if val, ok := node.Status.Capacity[gpuResourceName]; ok {
gpusAvailable += (&val).Value()
}
}
return gpusAvailable
}
func testNvidiaGPUsOnCOS(f *framework.Framework) {
// Skip the test if the base image is not COS.
// TODO: Add support for other base images.
// CUDA apps require host mounts which is not portable across base images (yet).
framework.Logf("Checking base image")
if !isClusterRunningCOS(f) {
Skip("Nvidia GPU tests are supproted only on Container Optimized OS image currently")
}
framework.Logf("Cluster is running on COS. Proceeding with test")
if f.BaseName == "device-plugin-gpus" {
dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/device-plugin-daemonset.yaml"
gpuResourceName = "nvidia.com/gpu"
podCreationFunc = makeCudaAdditionDevicePluginTestPod
} else {
dsYamlUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/master/cos-nvidia-gpu-installer/daemonset.yaml"
gpuResourceName = v1.ResourceNvidiaGPU
podCreationFunc = makeCudaAdditionTestPod
}
// GPU drivers might have already been installed.
if !areGPUsAvailableOnAllSchedulableNodes(f) {
// Install Nvidia Drivers.
ds := dsFromManifest(dsYamlUrl)
ds.Namespace = f.Namespace.Name
_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
framework.ExpectNoError(err, "failed to create daemonset")
framework.Logf("Successfully created daemonset to install Nvidia drivers. Waiting for drivers to be installed and GPUs to be available in Node Capacity...")
// Wait for Nvidia GPUs to be available on nodes
Eventually(func() bool {
return areGPUsAvailableOnAllSchedulableNodes(f)
}, driverInstallTimeout, time.Second).Should(BeTrue())
}
framework.Logf("Creating as many pods as there are Nvidia GPUs and have the pods run a CUDA app")
podList := []*v1.Pod{}
for i := int64(0); i < getGPUsAvailable(f); i++ {
podList = append(podList, f.PodClient().Create(podCreationFunc()))
}
framework.Logf("Wait for all test pods to succeed")
// Wait for all pods to succeed
for _, po := range podList {
f.PodClient().WaitForSuccess(po.Name, 5*time.Minute)
}
}
// dsFromManifest reads a .json/yaml file and returns the daemonset in it.
func dsFromManifest(url string) *extensions.DaemonSet {
var controller extensions.DaemonSet
framework.Logf("Parsing ds from %v", url)
var response *http.Response
var err error
for i := 1; i <= 5; i++ {
response, err = http.Get(url)
if err == nil && response.StatusCode == 200 {
break
}
time.Sleep(time.Duration(i) * time.Second)
}
Expect(err).NotTo(HaveOccurred())
Expect(response.StatusCode).To(Equal(200))
defer response.Body.Close()
data, err := ioutil.ReadAll(response.Body)
Expect(err).NotTo(HaveOccurred())
json, err := utilyaml.ToJSON(data)
Expect(err).NotTo(HaveOccurred())
Expect(runtime.DecodeInto(api.Codecs.UniversalDecoder(), json, &controller)).NotTo(HaveOccurred())
return &controller
}
var _ = SIGDescribe("[Feature:GPU]", func() {
f := framework.NewDefaultFramework("gpus")
It("run Nvidia GPU tests on Container Optimized OS only", func() {
testNvidiaGPUsOnCOS(f)
})
})
var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() {
f := framework.NewDefaultFramework("device-plugin-gpus")
It("run Nvidia GPU Device Plugin tests on Container Optimized OS only", func() {
// 1. Verifies GPU resource is successfully advertised on the nodes
// and we can run pods using GPUs.
By("Starting device plugin daemonset and running GPU pods")
testNvidiaGPUsOnCOS(f)
// 2. Verifies that when the device plugin DaemonSet is removed, resource capacity drops to zero.
By("Deleting device plugin daemonset")
ds := dsFromManifest(dsYamlUrl)
falseVar := false
err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Delete(ds.Name, &metav1.DeleteOptions{OrphanDependents: &falseVar})
framework.ExpectNoError(err, "failed to delete daemonset")
framework.Logf("Successfully deleted device plugin daemonset. Wait for resource to be removed.")
// Wait for Nvidia GPUs to be not available on nodes
Eventually(func() bool {
return !areGPUsAvailableOnAllSchedulableNodes(f)
}, 5*time.Minute, time.Second).Should(BeTrue())
// 3. Restarts the device plugin DaemonSet. Verifies GPU resource is successfully advertised
// on the nodes and we can run pods using GPUs.
By("Restarting device plugin daemonset and running GPU pods")
testNvidiaGPUsOnCOS(f)
})
})

View file

@ -0,0 +1,305 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"fmt"
"strings"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
"k8s.io/kubernetes/pkg/util/system"
"k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var _ = SIGDescribe("Opaque resources [Feature:OpaqueResources]", func() {
f := framework.NewDefaultFramework("opaque-resource")
opaqueResName := v1helper.OpaqueIntResourceName("foo")
var node *v1.Node
BeforeEach(func() {
if node == nil {
// Priming invocation; select the first non-master node.
nodes, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
Expect(err).NotTo(HaveOccurred())
for _, n := range nodes.Items {
if !system.IsMasterNode(n.Name) {
node = &n
break
}
}
if node == nil {
framework.Failf("unable to select a non-master node")
}
}
addOpaqueResource(f, node.Name, opaqueResName)
})
// TODO: The suite times out if removeOpaqueResource is called as part of
// an AfterEach closure. For now, it is the last statement in each
// It block.
// AfterEach(func() {
// removeOpaqueResource(f, node.Name, opaqueResName)
// })
It("should not break pods that do not consume opaque integer resources.", func() {
defer removeOpaqueResource(f, node.Name, opaqueResName)
By("Creating a vanilla pod")
requests := v1.ResourceList{v1.ResourceCPU: resource.MustParse("0.1")}
limits := v1.ResourceList{v1.ResourceCPU: resource.MustParse("0.2")}
pod := f.NewTestPod("without-oir", requests, limits)
By("Observing an event that indicates the pod was scheduled")
action := func() error {
_, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod)
return err
}
// Here we don't check for the bound node name since it can land on
// any one (this pod doesn't require any of the opaque resource.)
predicate := scheduleSuccessEvent(pod.Name, "")
success, err := common.ObserveEventAfterAction(f, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
})
It("should schedule pods that do consume opaque integer resources.", func() {
defer removeOpaqueResource(f, node.Name, opaqueResName)
By("Creating a pod that requires less of the opaque resource than is allocatable on a node.")
requests := v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0.1"),
opaqueResName: resource.MustParse("1"),
}
limits := v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0.2"),
opaqueResName: resource.MustParse("2"),
}
pod := f.NewTestPod("min-oir", requests, limits)
By("Observing an event that indicates the pod was scheduled")
action := func() error {
_, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod)
return err
}
predicate := scheduleSuccessEvent(pod.Name, node.Name)
success, err := common.ObserveEventAfterAction(f, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
})
It("should not schedule pods that exceed the available amount of opaque integer resource.", func() {
defer removeOpaqueResource(f, node.Name, opaqueResName)
By("Creating a pod that requires more of the opaque resource than is allocatable on any node")
requests := v1.ResourceList{opaqueResName: resource.MustParse("6")}
limits := v1.ResourceList{}
By("Observing an event that indicates the pod was not scheduled")
action := func() error {
_, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(f.NewTestPod("over-max-oir", requests, limits))
return err
}
predicate := scheduleFailureEvent("over-max-oir")
success, err := common.ObserveEventAfterAction(f, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
})
It("should account opaque integer resources in pods with multiple containers.", func() {
defer removeOpaqueResource(f, node.Name, opaqueResName)
By("Creating a pod with two containers that together require less of the opaque resource than is allocatable on a node")
requests := v1.ResourceList{opaqueResName: resource.MustParse("1")}
limits := v1.ResourceList{}
image := framework.GetPauseImageName(f.ClientSet)
// This pod consumes 2 "foo" resources.
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "mult-container-oir",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "pause",
Image: image,
Resources: v1.ResourceRequirements{
Requests: requests,
Limits: limits,
},
},
{
Name: "pause-sidecar",
Image: image,
Resources: v1.ResourceRequirements{
Requests: requests,
Limits: limits,
},
},
},
},
}
By("Observing an event that indicates the pod was scheduled")
action := func() error {
_, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod)
return err
}
predicate := scheduleSuccessEvent(pod.Name, node.Name)
success, err := common.ObserveEventAfterAction(f, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
By("Creating a pod with two containers that together require more of the opaque resource than is allocatable on any node")
requests = v1.ResourceList{opaqueResName: resource.MustParse("3")}
limits = v1.ResourceList{}
// This pod consumes 6 "foo" resources.
pod = &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "mult-container-over-max-oir",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "pause",
Image: image,
Resources: v1.ResourceRequirements{
Requests: requests,
Limits: limits,
},
},
{
Name: "pause-sidecar",
Image: image,
Resources: v1.ResourceRequirements{
Requests: requests,
Limits: limits,
},
},
},
},
}
By("Observing an event that indicates the pod was not scheduled")
action = func() error {
_, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod)
return err
}
predicate = scheduleFailureEvent(pod.Name)
success, err = common.ObserveEventAfterAction(f, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
})
It("should schedule pods that initially do not fit after enough opaque integer resources are freed.", func() {
defer removeOpaqueResource(f, node.Name, opaqueResName)
By("Creating a pod that requires less of the opaque resource than is allocatable on a node.")
requests := v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0.1"),
opaqueResName: resource.MustParse("3"),
}
limits := v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0.2"),
opaqueResName: resource.MustParse("3"),
}
pod1 := f.NewTestPod("oir-1", requests, limits)
pod2 := f.NewTestPod("oir-2", requests, limits)
By("Observing an event that indicates one pod was scheduled")
action := func() error {
_, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod1)
return err
}
predicate := scheduleSuccessEvent(pod1.Name, node.Name)
success, err := common.ObserveEventAfterAction(f, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
By("Observing an event that indicates a subsequent pod was not scheduled")
action = func() error {
_, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod2)
return err
}
predicate = scheduleFailureEvent(pod2.Name)
success, err = common.ObserveEventAfterAction(f, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
By("Observing an event that indicates the second pod was scheduled after deleting the first pod")
action = func() error {
err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Delete(pod1.Name, nil)
return err
}
predicate = scheduleSuccessEvent(pod2.Name, node.Name)
success, err = common.ObserveEventAfterAction(f, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
})
})
// Adds the opaque resource to a node.
func addOpaqueResource(f *framework.Framework, nodeName string, opaqueResName v1.ResourceName) {
action := func() error {
By(fmt.Sprintf("Adding OIR to node [%s]", nodeName))
patch := []byte(fmt.Sprintf(`[{"op": "add", "path": "/status/capacity/%s", "value": "5"}]`, escapeForJSONPatch(opaqueResName)))
return f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(nodeName).SubResource("status").Body(patch).Do().Error()
}
predicate := func(n *v1.Node) bool {
capacity, foundCap := n.Status.Capacity[opaqueResName]
allocatable, foundAlloc := n.Status.Allocatable[opaqueResName]
By(fmt.Sprintf("Node [%s] has OIR capacity: [%t] (%s), has OIR allocatable: [%t] (%s)", n.Name, foundCap, capacity.String(), foundAlloc, allocatable.String()))
return foundCap && capacity.MilliValue() == int64(5000) &&
foundAlloc && allocatable.MilliValue() == int64(5000)
}
success, err := common.ObserveNodeUpdateAfterAction(f, nodeName, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
}
// Removes the opaque resource from a node.
func removeOpaqueResource(f *framework.Framework, nodeName string, opaqueResName v1.ResourceName) {
action := func() error {
By(fmt.Sprintf("Removing OIR from node [%s]", nodeName))
patch := []byte(fmt.Sprintf(`[{"op": "remove", "path": "/status/capacity/%s"}]`, escapeForJSONPatch(opaqueResName)))
f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(nodeName).SubResource("status").Body(patch).Do()
return nil // Ignore error -- the opaque resource may not exist.
}
predicate := func(n *v1.Node) bool {
capacity, foundCap := n.Status.Capacity[opaqueResName]
allocatable, foundAlloc := n.Status.Allocatable[opaqueResName]
By(fmt.Sprintf("Node [%s] has OIR capacity: [%t] (%s), has OIR allocatable: [%t] (%s)", n.Name, foundCap, capacity.String(), foundAlloc, allocatable.String()))
return (!foundCap || capacity.IsZero()) && (!foundAlloc || allocatable.IsZero())
}
success, err := common.ObserveNodeUpdateAfterAction(f, nodeName, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
}
func escapeForJSONPatch(resName v1.ResourceName) string {
// Escape forward slashes in the resource name per the JSON Pointer spec.
// See https://tools.ietf.org/html/rfc6901#section-3
return strings.Replace(string(resName), "/", "~1", -1)
}

View file

@ -0,0 +1,734 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"fmt"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/uuid"
clientset "k8s.io/client-go/kubernetes"
utilversion "k8s.io/kubernetes/pkg/util/version"
"k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
testutils "k8s.io/kubernetes/test/utils"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
_ "github.com/stretchr/testify/assert"
)
const maxNumberOfPods int64 = 10
const minPodCPURequest int64 = 500
var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0")
// variable set in BeforeEach, never modified afterwards
var masterNodes sets.String
type pausePodConfig struct {
Name string
Affinity *v1.Affinity
Annotations, Labels, NodeSelector map[string]string
Resources *v1.ResourceRequirements
Tolerations []v1.Toleration
NodeName string
Ports []v1.ContainerPort
OwnerReferences []metav1.OwnerReference
PriorityClassName string
}
var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
var cs clientset.Interface
var nodeList *v1.NodeList
var systemPodsNo int
var totalPodCapacity int64
var RCName string
var ns string
f := framework.NewDefaultFramework("sched-pred")
ignoreLabels := framework.ImagePullerLabels
AfterEach(func() {
rc, err := cs.CoreV1().ReplicationControllers(ns).Get(RCName, metav1.GetOptions{})
if err == nil && *(rc.Spec.Replicas) != 0 {
By("Cleaning up the replication controller")
err := framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, ns, RCName)
framework.ExpectNoError(err)
}
})
BeforeEach(func() {
cs = f.ClientSet
ns = f.Namespace.Name
nodeList = &v1.NodeList{}
framework.WaitForAllNodesHealthy(cs, time.Minute)
masterNodes, nodeList = framework.GetMasterAndWorkerNodesOrDie(cs)
err := framework.CheckTestingNSDeletedExcept(cs, ns)
framework.ExpectNoError(err)
// Every test case in this suite assumes that cluster add-on pods stay stable and
// cannot be run in parallel with any other test that touches Nodes or Pods.
// It is so because we need to have precise control on what's running in the cluster.
systemPods, err := framework.GetPodsInNamespace(cs, ns, ignoreLabels)
Expect(err).NotTo(HaveOccurred())
systemPodsNo = 0
for _, pod := range systemPods {
if !masterNodes.Has(pod.Spec.NodeName) && pod.DeletionTimestamp == nil {
systemPodsNo++
}
}
err = framework.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout, ignoreLabels)
Expect(err).NotTo(HaveOccurred())
err = framework.WaitForPodsSuccess(cs, metav1.NamespaceSystem, framework.ImagePullerLabels, framework.ImagePrePullingTimeout)
Expect(err).NotTo(HaveOccurred())
for _, node := range nodeList.Items {
framework.Logf("\nLogging pods the kubelet thinks is on node %v before test", node.Name)
framework.PrintAllKubeletPods(cs, node.Name)
}
})
// This test verifies that max-pods flag works as advertised. It assumes that cluster add-on pods stay stable
// and cannot be run in parallel with any other test that touches Nodes or Pods. It is so because to check
// if max-pods is working we need to fully saturate the cluster and keep it in this state for few seconds.
//
// Slow PR #13315 (8 min)
It("validates MaxPods limit number of pods that are allowed to run [Slow]", func() {
totalPodCapacity = 0
for _, node := range nodeList.Items {
framework.Logf("Node: %v", node)
podCapacity, found := node.Status.Capacity[v1.ResourcePods]
Expect(found).To(Equal(true))
totalPodCapacity += podCapacity.Value()
}
currentlyScheduledPods := framework.WaitForStableCluster(cs, masterNodes)
podsNeededForSaturation := int(totalPodCapacity) - currentlyScheduledPods
By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster max pods and trying to start another one", podsNeededForSaturation))
// As the pods are distributed randomly among nodes,
// it can easily happen that all nodes are satured
// and there is no need to create additional pods.
// StartPods requires at least one pod to replicate.
if podsNeededForSaturation > 0 {
framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "maxp",
*initPausePod(f, pausePodConfig{
Name: "",
Labels: map[string]string{"name": ""},
}), true, framework.Logf))
}
podName := "additional-pod"
WaitForSchedulerAfterAction(f, createPausePodAction(f, pausePodConfig{
Name: podName,
Labels: map[string]string{"name": "additional"},
}), podName, false)
verifyResult(cs, podsNeededForSaturation, 1, ns)
})
// This test verifies we don't allow scheduling of pods in a way that sum of local ephemeral storage limits of pods is greater than machines capacity.
// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
// It is so because we need to have precise control on what's running in the cluster.
It("validates local ephemeral storage resource limits of pods that are allowed to run [Feature:LocalStorageCapacityIsolation]", func() {
framework.SkipUnlessServerVersionGTE(localStorageVersion, f.ClientSet.Discovery())
nodeMaxAllocatable := int64(0)
nodeToAllocatableMap := make(map[string]int64)
for _, node := range nodeList.Items {
allocatable, found := node.Status.Allocatable[v1.ResourceEphemeralStorage]
Expect(found).To(Equal(true))
nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
if nodeMaxAllocatable < allocatable.MilliValue() {
nodeMaxAllocatable = allocatable.MilliValue()
}
}
framework.WaitForStableCluster(cs, masterNodes)
pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
framework.ExpectNoError(err)
for _, pod := range pods.Items {
_, found := nodeToAllocatableMap[pod.Spec.NodeName]
if found && pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed {
framework.Logf("Pod %v requesting local ephemeral resource =%vm on Node %v", pod.Name, getRequestedStorageEphemeralStorage(pod), pod.Spec.NodeName)
nodeToAllocatableMap[pod.Spec.NodeName] -= getRequestedStorageEphemeralStorage(pod)
}
}
var podsNeededForSaturation int
milliEphemeralStoragePerPod := nodeMaxAllocatable / maxNumberOfPods
framework.Logf("Using pod capacity: %vm", milliEphemeralStoragePerPod)
for name, leftAllocatable := range nodeToAllocatableMap {
framework.Logf("Node: %v has local ephemeral resource allocatable: %vm", name, leftAllocatable)
podsNeededForSaturation += (int)(leftAllocatable / milliEphemeralStoragePerPod)
}
By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster local ephemeral resource and trying to start another one", podsNeededForSaturation))
// As the pods are distributed randomly among nodes,
// it can easily happen that all nodes are saturated
// and there is no need to create additional pods.
// StartPods requires at least one pod to replicate.
if podsNeededForSaturation > 0 {
framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit",
*initPausePod(f, pausePodConfig{
Name: "",
Labels: map[string]string{"name": ""},
Resources: &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(milliEphemeralStoragePerPod, "DecimalSI"),
},
Requests: v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(milliEphemeralStoragePerPod, "DecimalSI"),
},
},
}), true, framework.Logf))
}
podName := "additional-pod"
conf := pausePodConfig{
Name: podName,
Labels: map[string]string{"name": "additional"},
Resources: &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(milliEphemeralStoragePerPod, "DecimalSI"),
},
},
}
WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
verifyResult(cs, podsNeededForSaturation, 1, ns)
})
// This test verifies we don't allow scheduling of pods in a way that sum of limits of pods is greater than machines capacity.
// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
// It is so because we need to have precise control on what's running in the cluster.
It("validates resource limits of pods that are allowed to run [Conformance]", func() {
nodeMaxAllocatable := int64(0)
nodeToAllocatableMap := make(map[string]int64)
for _, node := range nodeList.Items {
allocatable, found := node.Status.Allocatable[v1.ResourceCPU]
Expect(found).To(Equal(true))
nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
if nodeMaxAllocatable < allocatable.MilliValue() {
nodeMaxAllocatable = allocatable.MilliValue()
}
}
framework.WaitForStableCluster(cs, masterNodes)
pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
framework.ExpectNoError(err)
for _, pod := range pods.Items {
_, found := nodeToAllocatableMap[pod.Spec.NodeName]
if found && pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed {
framework.Logf("Pod %v requesting resource cpu=%vm on Node %v", pod.Name, getRequestedCPU(pod), pod.Spec.NodeName)
nodeToAllocatableMap[pod.Spec.NodeName] -= getRequestedCPU(pod)
}
}
var podsNeededForSaturation int
milliCpuPerPod := nodeMaxAllocatable / maxNumberOfPods
if milliCpuPerPod < minPodCPURequest {
milliCpuPerPod = minPodCPURequest
}
framework.Logf("Using pod capacity: %vm", milliCpuPerPod)
for name, leftAllocatable := range nodeToAllocatableMap {
framework.Logf("Node: %v has cpu allocatable: %vm", name, leftAllocatable)
podsNeededForSaturation += (int)(leftAllocatable / milliCpuPerPod)
}
By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster CPU and trying to start another one", podsNeededForSaturation))
// As the pods are distributed randomly among nodes,
// it can easily happen that all nodes are saturated
// and there is no need to create additional pods.
// StartPods requires at least one pod to replicate.
if podsNeededForSaturation > 0 {
framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit",
*initPausePod(f, pausePodConfig{
Name: "",
Labels: map[string]string{"name": ""},
Resources: &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
},
},
}), true, framework.Logf))
}
podName := "additional-pod"
conf := pausePodConfig{
Name: podName,
Labels: map[string]string{"name": "additional"},
Resources: &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
},
},
}
WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
verifyResult(cs, podsNeededForSaturation, 1, ns)
})
// Test Nodes does not have any label, hence it should be impossible to schedule Pod with
// nonempty Selector set.
It("validates that NodeSelector is respected if not matching [Conformance]", func() {
By("Trying to schedule Pod with nonempty NodeSelector.")
podName := "restricted-pod"
framework.WaitForStableCluster(cs, masterNodes)
conf := pausePodConfig{
Name: podName,
Labels: map[string]string{"name": "restricted"},
NodeSelector: map[string]string{
"label": "nonempty",
},
}
WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
verifyResult(cs, 0, 1, ns)
})
It("validates that a pod with an invalid NodeAffinity is rejected", func() {
By("Trying to launch a pod with an invalid Affinity data.")
podName := "without-label"
_, err := cs.CoreV1().Pods(ns).Create(initPausePod(f, pausePodConfig{
Name: podName,
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{},
},
},
},
},
},
}))
if err == nil || !errors.IsInvalid(err) {
framework.Failf("Expect error of invalid, got : %v", err)
}
})
It("validates that NodeSelector is respected if matching [Conformance]", func() {
nodeName := GetNodeThatCanRunPod(f)
By("Trying to apply a random label on the found node.")
k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
v := "42"
framework.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
framework.ExpectNodeHasLabel(cs, nodeName, k, v)
defer framework.RemoveLabelOffNode(cs, nodeName, k)
By("Trying to relaunch the pod, now with labels.")
labelPodName := "with-labels"
createPausePod(f, pausePodConfig{
Name: labelPodName,
NodeSelector: map[string]string{
k: v,
},
})
// check that pod got scheduled. We intentionally DO NOT check that the
// pod is running because this will create a race condition with the
// kubelet and the scheduler: the scheduler might have scheduled a pod
// already when the kubelet does not know about its new label yet. The
// kubelet will then refuse to launch the pod.
framework.ExpectNoError(framework.WaitForPodNotPending(cs, ns, labelPodName))
labelPod, err := cs.CoreV1().Pods(ns).Get(labelPodName, metav1.GetOptions{})
framework.ExpectNoError(err)
Expect(labelPod.Spec.NodeName).To(Equal(nodeName))
})
// Test Nodes does not have any label, hence it should be impossible to schedule Pod with
// non-nil NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.
It("validates that NodeAffinity is respected if not matching", func() {
By("Trying to schedule Pod with nonempty NodeSelector.")
podName := "restricted-pod"
framework.WaitForStableCluster(cs, masterNodes)
conf := pausePodConfig{
Name: podName,
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar", "value2"},
},
},
}, {
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "diffkey",
Operator: v1.NodeSelectorOpIn,
Values: []string{"wrong", "value2"},
},
},
},
},
},
},
},
Labels: map[string]string{"name": "restricted"},
}
WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
verifyResult(cs, 0, 1, ns)
})
// Keep the same steps with the test on NodeSelector,
// but specify Affinity in Pod.Spec.Affinity, instead of NodeSelector.
It("validates that required NodeAffinity setting is respected if matching", func() {
nodeName := GetNodeThatCanRunPod(f)
By("Trying to apply a random label on the found node.")
k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
v := "42"
framework.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
framework.ExpectNodeHasLabel(cs, nodeName, k, v)
defer framework.RemoveLabelOffNode(cs, nodeName, k)
By("Trying to relaunch the pod, now with labels.")
labelPodName := "with-labels"
createPausePod(f, pausePodConfig{
Name: labelPodName,
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: k,
Operator: v1.NodeSelectorOpIn,
Values: []string{v},
},
},
},
},
},
},
},
})
// check that pod got scheduled. We intentionally DO NOT check that the
// pod is running because this will create a race condition with the
// kubelet and the scheduler: the scheduler might have scheduled a pod
// already when the kubelet does not know about its new label yet. The
// kubelet will then refuse to launch the pod.
framework.ExpectNoError(framework.WaitForPodNotPending(cs, ns, labelPodName))
labelPod, err := cs.CoreV1().Pods(ns).Get(labelPodName, metav1.GetOptions{})
framework.ExpectNoError(err)
Expect(labelPod.Spec.NodeName).To(Equal(nodeName))
})
// 1. Run a pod to get an available node, then delete the pod
// 2. Taint the node with a random taint
// 3. Try to relaunch the pod with tolerations tolerate the taints on node,
// and the pod's nodeName specified to the name of node found in step 1
It("validates that taints-tolerations is respected if matching", func() {
nodeName := getNodeThatCanRunPodWithoutToleration(f)
By("Trying to apply a random taint on the found node.")
testTaint := v1.Taint{
Key: fmt.Sprintf("kubernetes.io/e2e-taint-key-%s", string(uuid.NewUUID())),
Value: "testing-taint-value",
Effect: v1.TaintEffectNoSchedule,
}
framework.AddOrUpdateTaintOnNode(cs, nodeName, testTaint)
framework.ExpectNodeHasTaint(cs, nodeName, &testTaint)
defer framework.RemoveTaintOffNode(cs, nodeName, testTaint)
By("Trying to apply a random label on the found node.")
labelKey := fmt.Sprintf("kubernetes.io/e2e-label-key-%s", string(uuid.NewUUID()))
labelValue := "testing-label-value"
framework.AddOrUpdateLabelOnNode(cs, nodeName, labelKey, labelValue)
framework.ExpectNodeHasLabel(cs, nodeName, labelKey, labelValue)
defer framework.RemoveLabelOffNode(cs, nodeName, labelKey)
By("Trying to relaunch the pod, now with tolerations.")
tolerationPodName := "with-tolerations"
createPausePod(f, pausePodConfig{
Name: tolerationPodName,
Tolerations: []v1.Toleration{{Key: testTaint.Key, Value: testTaint.Value, Effect: testTaint.Effect}},
NodeSelector: map[string]string{labelKey: labelValue},
})
// check that pod got scheduled. We intentionally DO NOT check that the
// pod is running because this will create a race condition with the
// kubelet and the scheduler: the scheduler might have scheduled a pod
// already when the kubelet does not know about its new taint yet. The
// kubelet will then refuse to launch the pod.
framework.ExpectNoError(framework.WaitForPodNotPending(cs, ns, tolerationPodName))
deployedPod, err := cs.CoreV1().Pods(ns).Get(tolerationPodName, metav1.GetOptions{})
framework.ExpectNoError(err)
Expect(deployedPod.Spec.NodeName).To(Equal(nodeName))
})
// 1. Run a pod to get an available node, then delete the pod
// 2. Taint the node with a random taint
// 3. Try to relaunch the pod still no tolerations,
// and the pod's nodeName specified to the name of node found in step 1
It("validates that taints-tolerations is respected if not matching", func() {
nodeName := getNodeThatCanRunPodWithoutToleration(f)
By("Trying to apply a random taint on the found node.")
testTaint := v1.Taint{
Key: fmt.Sprintf("kubernetes.io/e2e-taint-key-%s", string(uuid.NewUUID())),
Value: "testing-taint-value",
Effect: v1.TaintEffectNoSchedule,
}
framework.AddOrUpdateTaintOnNode(cs, nodeName, testTaint)
framework.ExpectNodeHasTaint(cs, nodeName, &testTaint)
defer framework.RemoveTaintOffNode(cs, nodeName, testTaint)
By("Trying to apply a random label on the found node.")
labelKey := fmt.Sprintf("kubernetes.io/e2e-label-key-%s", string(uuid.NewUUID()))
labelValue := "testing-label-value"
framework.AddOrUpdateLabelOnNode(cs, nodeName, labelKey, labelValue)
framework.ExpectNodeHasLabel(cs, nodeName, labelKey, labelValue)
defer framework.RemoveLabelOffNode(cs, nodeName, labelKey)
By("Trying to relaunch the pod, still no tolerations.")
podNameNoTolerations := "still-no-tolerations"
conf := pausePodConfig{
Name: podNameNoTolerations,
NodeSelector: map[string]string{labelKey: labelValue},
}
WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podNameNoTolerations, false)
verifyResult(cs, 0, 1, ns)
By("Removing taint off the node")
WaitForSchedulerAfterAction(f, removeTaintFromNodeAction(cs, nodeName, testTaint), podNameNoTolerations, true)
verifyResult(cs, 1, 0, ns)
})
})
func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: conf.Name,
Labels: conf.Labels,
Annotations: conf.Annotations,
OwnerReferences: conf.OwnerReferences,
},
Spec: v1.PodSpec{
NodeSelector: conf.NodeSelector,
Affinity: conf.Affinity,
Containers: []v1.Container{
{
Name: conf.Name,
Image: framework.GetPauseImageName(f.ClientSet),
Ports: conf.Ports,
},
},
Tolerations: conf.Tolerations,
NodeName: conf.NodeName,
PriorityClassName: conf.PriorityClassName,
},
}
if conf.Resources != nil {
pod.Spec.Containers[0].Resources = *conf.Resources
}
return pod
}
func createPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(initPausePod(f, conf))
framework.ExpectNoError(err)
return pod
}
func runPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
pod := createPausePod(f, conf)
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(f.ClientSet, pod))
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(conf.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
return pod
}
func runPodAndGetNodeName(f *framework.Framework, conf pausePodConfig) string {
// launch a pod to find a node which can launch a pod. We intentionally do
// not just take the node list and choose the first of them. Depending on the
// cluster and the scheduler it might be that a "normal" pod cannot be
// scheduled onto it.
pod := runPausePod(f, conf)
By("Explicitly delete pod here to free the resource it takes.")
err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Delete(pod.Name, metav1.NewDeleteOptions(0))
framework.ExpectNoError(err)
return pod.Spec.NodeName
}
func getRequestedCPU(pod v1.Pod) int64 {
var result int64
for _, container := range pod.Spec.Containers {
result += container.Resources.Requests.Cpu().MilliValue()
}
return result
}
func getRequestedStorageEphemeralStorage(pod v1.Pod) int64 {
var result int64
for _, container := range pod.Spec.Containers {
result += container.Resources.Requests.StorageEphemeral().MilliValue()
}
return result
}
// removeTaintFromNodeAction returns a closure that removes the given taint
// from the given node upon invocation.
func removeTaintFromNodeAction(cs clientset.Interface, nodeName string, testTaint v1.Taint) common.Action {
return func() error {
framework.RemoveTaintOffNode(cs, nodeName, testTaint)
return nil
}
}
// createPausePodAction returns a closure that creates a pause pod upon invocation.
func createPausePodAction(f *framework.Framework, conf pausePodConfig) common.Action {
return func() error {
_, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(initPausePod(f, conf))
return err
}
}
// WaitForSchedulerAfterAction performs the provided action and then waits for
// scheduler to act on the given pod.
func WaitForSchedulerAfterAction(f *framework.Framework, action common.Action, podName string, expectSuccess bool) {
predicate := scheduleFailureEvent(podName)
if expectSuccess {
predicate = scheduleSuccessEvent(podName, "" /* any node */)
}
success, err := common.ObserveEventAfterAction(f, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
}
// TODO: upgrade calls in PodAffinity tests when we're able to run them
func verifyResult(c clientset.Interface, expectedScheduled int, expectedNotScheduled int, ns string) {
allPods, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{})
framework.ExpectNoError(err)
scheduledPods, notScheduledPods := framework.GetPodsScheduled(masterNodes, allPods)
printed := false
printOnce := func(msg string) string {
if !printed {
printed = true
return msg
} else {
return ""
}
}
Expect(len(notScheduledPods)).To(Equal(expectedNotScheduled), printOnce(fmt.Sprintf("Not scheduled Pods: %#v", notScheduledPods)))
Expect(len(scheduledPods)).To(Equal(expectedScheduled), printOnce(fmt.Sprintf("Scheduled Pods: %#v", scheduledPods)))
}
// verifyReplicasResult is wrapper of verifyResult for a group pods with same "name: labelName" label, which means they belong to same RC
func verifyReplicasResult(c clientset.Interface, expectedScheduled int, expectedNotScheduled int, ns string, labelName string) {
allPods := getPodsByLabels(c, ns, map[string]string{"name": labelName})
scheduledPods, notScheduledPods := framework.GetPodsScheduled(masterNodes, allPods)
printed := false
printOnce := func(msg string) string {
if !printed {
printed = true
return msg
} else {
return ""
}
}
Expect(len(notScheduledPods)).To(Equal(expectedNotScheduled), printOnce(fmt.Sprintf("Not scheduled Pods: %#v", notScheduledPods)))
Expect(len(scheduledPods)).To(Equal(expectedScheduled), printOnce(fmt.Sprintf("Scheduled Pods: %#v", scheduledPods)))
}
func getPodsByLabels(c clientset.Interface, ns string, labelsMap map[string]string) *v1.PodList {
selector := labels.SelectorFromSet(labels.Set(labelsMap))
allPods, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{LabelSelector: selector.String()})
framework.ExpectNoError(err)
return allPods
}
func runAndKeepPodWithLabelAndGetNodeName(f *framework.Framework) (string, string) {
// launch a pod to find a node which can launch a pod. We intentionally do
// not just take the node list and choose the first of them. Depending on the
// cluster and the scheduler it might be that a "normal" pod cannot be
// scheduled onto it.
By("Trying to launch a pod with a label to get a node which can launch it.")
pod := runPausePod(f, pausePodConfig{
Name: "with-label-" + string(uuid.NewUUID()),
Labels: map[string]string{"security": "S1"},
})
return pod.Spec.NodeName, pod.Name
}
func GetNodeThatCanRunPod(f *framework.Framework) string {
By("Trying to launch a pod without a label to get a node which can launch it.")
return runPodAndGetNodeName(f, pausePodConfig{Name: "without-label"})
}
func getNodeThatCanRunPodWithoutToleration(f *framework.Framework) string {
By("Trying to launch a pod without a toleration to get a node which can launch it.")
return runPodAndGetNodeName(f, pausePodConfig{Name: "without-toleration"})
}
func CreateHostPortPods(f *framework.Framework, id string, replicas int, expectRunning bool) {
By(fmt.Sprintf("Running RC which reserves host port"))
config := &testutils.RCConfig{
Client: f.ClientSet,
InternalClient: f.InternalClientset,
Name: id,
Namespace: f.Namespace.Name,
Timeout: defaultTimeout,
Image: framework.GetPauseImageName(f.ClientSet),
Replicas: replicas,
HostPorts: map[string]int{"port1": 4321},
}
err := framework.RunRC(*config)
if expectRunning {
framework.ExpectNoError(err)
}
}

View file

@ -0,0 +1,253 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"fmt"
"time"
"k8s.io/api/core/v1"
"k8s.io/api/scheduling/v1alpha1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
_ "github.com/stretchr/testify/assert"
)
var _ = SIGDescribe("SchedulerPreemption [Serial] [Feature:PodPreemption]", func() {
var cs clientset.Interface
var nodeList *v1.NodeList
var ns string
f := framework.NewDefaultFramework("sched-preemption")
lowPriority, mediumPriority, highPriority := int32(1), int32(100), int32(1000)
lowPriorityClassName := f.BaseName + "-low-priority"
mediumPriorityClassName := f.BaseName + "-medium-priority"
highPriorityClassName := f.BaseName + "-high-priority"
AfterEach(func() {
})
BeforeEach(func() {
cs = f.ClientSet
ns = f.Namespace.Name
nodeList = &v1.NodeList{}
_, err := f.ClientSet.SchedulingV1alpha1().PriorityClasses().Create(&v1alpha1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority})
Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true))
_, err = f.ClientSet.SchedulingV1alpha1().PriorityClasses().Create(&v1alpha1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: mediumPriorityClassName}, Value: mediumPriority})
Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true))
_, err = f.ClientSet.SchedulingV1alpha1().PriorityClasses().Create(&v1alpha1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: lowPriorityClassName}, Value: lowPriority})
Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true))
framework.WaitForAllNodesHealthy(cs, time.Minute)
masterNodes, nodeList = framework.GetMasterAndWorkerNodesOrDie(cs)
err = framework.CheckTestingNSDeletedExcept(cs, ns)
framework.ExpectNoError(err)
})
// This test verifies that when a higher priority pod is created and no node with
// enough resources is found, scheduler preempts a lower priority pod to schedule
// the high priority pod.
It("validates basic preemption works", func() {
var podRes v1.ResourceList
// Create one pod per node that uses a lot of the node's resources.
By("Create pods that use 60% of node resources.")
pods := make([]*v1.Pod, len(nodeList.Items))
for i, node := range nodeList.Items {
cpuAllocatable, found := node.Status.Allocatable["cpu"]
Expect(found).To(Equal(true))
milliCPU := cpuAllocatable.MilliValue() * 40 / 100
memAllocatable, found := node.Status.Allocatable["memory"]
Expect(found).To(Equal(true))
memory := memAllocatable.Value() * 60 / 100
podRes = v1.ResourceList{}
podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI)
podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI)
// make the first pod low priority and the rest medium priority.
priorityName := mediumPriorityClassName
if i == 0 {
priorityName = lowPriorityClassName
}
pods[i] = createPausePod(f, pausePodConfig{
Name: fmt.Sprintf("pod%d-%v", i, priorityName),
PriorityClassName: priorityName,
Resources: &v1.ResourceRequirements{
Requests: podRes,
},
})
framework.Logf("Created pod: %v", pods[i].Name)
}
By("Wait for pods to be scheduled.")
for _, pod := range pods {
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
}
By("Run a high priority pod that use 60% of a node resources.")
// Create a high priority pod and make sure it is scheduled.
runPausePod(f, pausePodConfig{
Name: "preemptor-pod",
PriorityClassName: highPriorityClassName,
Resources: &v1.ResourceRequirements{
Requests: podRes,
},
})
// Make sure that the lowest priority pod is deleted.
preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
podDeleted := (err != nil && errors.IsNotFound(err)) ||
(err == nil && preemptedPod.DeletionTimestamp != nil)
Expect(podDeleted).To(BeTrue())
// Other pods (mid priority ones) should be present.
for i := 1; i < len(pods); i++ {
livePod, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
framework.ExpectNoError(err)
Expect(livePod.DeletionTimestamp).To(BeNil())
}
})
// This test verifies that when a high priority pod is pending and its
// scheduling violates a medium priority pod anti-affinity, the medium priority
// pod is preempted to allow the higher priority pod schedule.
// It also verifies that existing low priority pods are not preempted as their
// preemption wouldn't help.
It("validates pod anti-affinity works in preemption", func() {
var podRes v1.ResourceList
// Create a few pods that uses a small amount of resources.
By("Create pods that use 10% of node resources.")
numPods := 4
if len(nodeList.Items) < numPods {
numPods = len(nodeList.Items)
}
pods := make([]*v1.Pod, numPods)
for i := 0; i < numPods; i++ {
node := nodeList.Items[i]
cpuAllocatable, found := node.Status.Allocatable["cpu"]
Expect(found).To(BeTrue())
milliCPU := cpuAllocatable.MilliValue() * 10 / 100
memAllocatable, found := node.Status.Allocatable["memory"]
Expect(found).To(BeTrue())
memory := memAllocatable.Value() * 10 / 100
podRes = v1.ResourceList{}
podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI)
podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI)
// Apply node label to each node
framework.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
framework.ExpectNodeHasLabel(cs, node.Name, "node", node.Name)
// make the first pod medium priority and the rest low priority.
priorityName := lowPriorityClassName
if i == 0 {
priorityName = mediumPriorityClassName
}
pods[i] = createPausePod(f, pausePodConfig{
Name: fmt.Sprintf("pod%d-%v", i, priorityName),
PriorityClassName: priorityName,
Resources: &v1.ResourceRequirements{
Requests: podRes,
},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"blah", "foo"},
},
},
},
TopologyKey: "node",
},
},
},
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "node",
Operator: v1.NodeSelectorOpIn,
Values: []string{node.Name},
},
},
},
},
},
},
},
})
framework.Logf("Created pod: %v", pods[i].Name)
}
defer func() { // Remove added labels
for i := 0; i < numPods; i++ {
framework.RemoveLabelOffNode(cs, nodeList.Items[i].Name, "node")
}
}()
By("Wait for pods to be scheduled.")
for _, pod := range pods {
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
}
By("Run a high priority pod with node affinity to the first node.")
// Create a high priority pod and make sure it is scheduled.
runPausePod(f, pausePodConfig{
Name: "preemptor-pod",
PriorityClassName: highPriorityClassName,
Labels: map[string]string{"service": "blah"},
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "node",
Operator: v1.NodeSelectorOpIn,
Values: []string{nodeList.Items[0].Name},
},
},
},
},
},
},
},
})
// Make sure that the medium priority pod on the first node is preempted.
preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
podDeleted := (err != nil && errors.IsNotFound(err)) ||
(err == nil && preemptedPod.DeletionTimestamp != nil)
Expect(podDeleted).To(BeTrue())
// Other pods (low priority ones) should be present.
for i := 1; i < len(pods); i++ {
livePod, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
framework.ExpectNoError(err)
Expect(livePod.DeletionTimestamp).To(BeNil())
}
})
})

View file

@ -0,0 +1,403 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"encoding/json"
"fmt"
"math"
"time"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
_ "github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/uuid"
clientset "k8s.io/client-go/kubernetes"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
testutils "k8s.io/kubernetes/test/utils"
)
type Resource struct {
MilliCPU int64
Memory int64
}
var balancePodLabel map[string]string = map[string]string{"name": "priority-balanced-memory"}
var podRequestedResource *v1.ResourceRequirements = &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100Mi"),
v1.ResourceCPU: resource.MustParse("100m"),
},
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100Mi"),
v1.ResourceCPU: resource.MustParse("100m"),
},
}
// This test suite is used to verifies scheduler priority functions based on the default provider
var _ = SIGDescribe("SchedulerPriorities [Serial]", func() {
var cs clientset.Interface
var nodeList *v1.NodeList
var systemPodsNo int
var ns string
var masterNodes sets.String
f := framework.NewDefaultFramework("sched-priority")
ignoreLabels := framework.ImagePullerLabels
AfterEach(func() {
})
BeforeEach(func() {
cs = f.ClientSet
ns = f.Namespace.Name
nodeList = &v1.NodeList{}
framework.WaitForAllNodesHealthy(cs, time.Minute)
masterNodes, nodeList = framework.GetMasterAndWorkerNodesOrDie(cs)
err := framework.CheckTestingNSDeletedExcept(cs, ns)
framework.ExpectNoError(err)
err = framework.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout, ignoreLabels)
Expect(err).NotTo(HaveOccurred())
})
It("Pod should be schedule to node that don't match the PodAntiAffinity terms", func() {
By("Trying to launch a pod with a label to get a node which can launch it.")
pod := runPausePod(f, pausePodConfig{
Name: "pod-with-label-security-s1",
Labels: map[string]string{"security": "S1"},
})
nodeName := pod.Spec.NodeName
By("Trying to apply a label on the found node.")
k := fmt.Sprintf("kubernetes.io/e2e-%s", "node-topologyKey")
v := "topologyvalue"
framework.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
framework.ExpectNodeHasLabel(cs, nodeName, k, v)
defer framework.RemoveLabelOffNode(cs, nodeName, k)
// make the nodes have balanced cpu,mem usage
err := createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.6)
framework.ExpectNoError(err)
By("Trying to launch the pod with podAntiAffinity.")
labelPodName := "pod-with-pod-antiaffinity"
pod = createPausePod(f, pausePodConfig{
Resources: podRequestedResource,
Name: labelPodName,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1", "value2"},
},
{
Key: "security",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"S2"},
}, {
Key: "security",
Operator: metav1.LabelSelectorOpExists,
},
},
},
TopologyKey: k,
Namespaces: []string{ns},
},
Weight: 10,
},
},
},
},
})
By("Wait the pod becomes running")
framework.ExpectNoError(f.WaitForPodRunning(pod.Name))
labelPod, err := cs.CoreV1().Pods(ns).Get(labelPodName, metav1.GetOptions{})
framework.ExpectNoError(err)
By("Verify the pod was scheduled to the expected node.")
Expect(labelPod.Spec.NodeName).NotTo(Equal(nodeName))
})
It("Pod should avoid to schedule to node that have avoidPod annotation", func() {
nodeName := nodeList.Items[0].Name
// make the nodes have balanced cpu,mem usage
err := createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.5)
framework.ExpectNoError(err)
By("Create a RC, with 0 replicas")
rc := createRC(ns, "scheduler-priority-avoid-pod", int32(0), map[string]string{"name": "scheduler-priority-avoid-pod"}, f, podRequestedResource)
// Cleanup the replication controller when we are done.
defer func() {
// Resize the replication controller to zero to get rid of pods.
if err := framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, rc.Name); err != nil {
framework.Logf("Failed to cleanup replication controller %v: %v.", rc.Name, err)
}
}()
By("Trying to apply avoidPod annotations on the first node.")
avoidPod := v1.AvoidPods{
PreferAvoidPods: []v1.PreferAvoidPodsEntry{
{
PodSignature: v1.PodSignature{
PodController: &metav1.OwnerReference{
APIVersion: "v1",
Kind: "ReplicationController",
Name: rc.Name,
UID: rc.UID,
Controller: func() *bool { b := true; return &b }(),
},
},
Reason: "some reson",
Message: "some message",
},
},
}
action := func() error {
framework.AddOrUpdateAvoidPodOnNode(cs, nodeName, avoidPod)
return nil
}
predicate := func(node *v1.Node) bool {
val, err := json.Marshal(avoidPod)
if err != nil {
return false
}
return node.Annotations[v1.PreferAvoidPodsAnnotationKey] == string(val)
}
success, err := common.ObserveNodeUpdateAfterAction(f, nodeName, predicate, action)
Expect(err).NotTo(HaveOccurred())
Expect(success).To(Equal(true))
defer framework.RemoveAvoidPodsOffNode(cs, nodeName)
By(fmt.Sprintf("Scale the RC: %s to len(nodeList.Item)-1 : %v.", rc.Name, len(nodeList.Items)-1))
framework.ScaleRC(f.ClientSet, f.InternalClientset, ns, rc.Name, uint(len(nodeList.Items)-1), true)
testPods, err := cs.CoreV1().Pods(ns).List(metav1.ListOptions{
LabelSelector: "name=scheduler-priority-avoid-pod",
})
Expect(err).NotTo(HaveOccurred())
By(fmt.Sprintf("Verify the pods should not scheduled to the node: %s", nodeName))
for _, pod := range testPods.Items {
Expect(pod.Spec.NodeName).NotTo(Equal(nodeName))
}
})
It("Pod should perfer to scheduled to nodes pod can tolerate", func() {
// make the nodes have balanced cpu,mem usage ratio
err := createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.5)
framework.ExpectNoError(err)
//we need apply more taints on a node, because one match toleration only count 1
By("Trying to apply 10 taint on the nodes except first one.")
nodeName := nodeList.Items[0].Name
for index, node := range nodeList.Items {
if index == 0 {
continue
}
for i := 0; i < 10; i++ {
testTaint := addRandomTaitToNode(cs, node.Name)
defer framework.RemoveTaintOffNode(cs, node.Name, *testTaint)
}
}
By("Create a pod without any tolerations")
tolerationPodName := "without-tolerations"
pod := createPausePod(f, pausePodConfig{
Name: tolerationPodName,
})
framework.ExpectNoError(f.WaitForPodRunning(pod.Name))
By("Pod should prefer scheduled to the node don't have the taint.")
tolePod, err := cs.CoreV1().Pods(ns).Get(tolerationPodName, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred())
Expect(tolePod.Spec.NodeName).To(Equal(nodeName))
By("Trying to apply 10 taint on the first node.")
var tolerations []v1.Toleration
for i := 0; i < 10; i++ {
testTaint := addRandomTaitToNode(cs, nodeName)
tolerations = append(tolerations, v1.Toleration{Key: testTaint.Key, Value: testTaint.Value, Effect: testTaint.Effect})
defer framework.RemoveTaintOffNode(cs, nodeName, *testTaint)
}
tolerationPodName = "with-tolerations"
By("Create a pod that tolerates all the taints of the first node.")
pod = createPausePod(f, pausePodConfig{
Name: tolerationPodName,
Tolerations: tolerations,
})
framework.ExpectNoError(f.WaitForPodRunning(pod.Name))
By("Pod should prefer scheduled to the node that pod can tolerate.")
tolePod, err = cs.CoreV1().Pods(ns).Get(tolerationPodName, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred())
Expect(tolePod.Spec.NodeName).To(Equal(nodeName))
})
})
// createBalancedPodForNodes creates a pod per node that asks for enough resources to make all nodes have the same mem/cpu usage ratio.
func createBalancedPodForNodes(f *framework.Framework, cs clientset.Interface, ns string, nodes []v1.Node, requestedResource *v1.ResourceRequirements, ratio float64) error {
// find the max, if the node has the max,use the one, if not,use the ratio parameter
var maxCPUFraction, maxMemFraction float64 = ratio, ratio
var cpuFractionMap = make(map[string]float64)
var memFractionMap = make(map[string]float64)
for _, node := range nodes {
cpuFraction, memFraction := computeCpuMemFraction(cs, node, requestedResource)
cpuFractionMap[node.Name] = cpuFraction
memFractionMap[node.Name] = memFraction
if cpuFraction > maxCPUFraction {
maxCPUFraction = cpuFraction
}
if memFraction > maxMemFraction {
maxMemFraction = memFraction
}
}
// we need the max one to keep the same cpu/mem use rate
ratio = math.Max(maxCPUFraction, maxMemFraction)
for _, node := range nodes {
memAllocatable, found := node.Status.Allocatable[v1.ResourceMemory]
Expect(found).To(Equal(true))
memAllocatableVal := memAllocatable.Value()
cpuAllocatable, found := node.Status.Allocatable[v1.ResourceCPU]
Expect(found).To(Equal(true))
cpuAllocatableMil := cpuAllocatable.MilliValue()
needCreateResource := v1.ResourceList{}
cpuFraction := cpuFractionMap[node.Name]
memFraction := memFractionMap[node.Name]
needCreateResource[v1.ResourceCPU] = *resource.NewMilliQuantity(int64((ratio-cpuFraction)*float64(cpuAllocatableMil)), resource.DecimalSI)
needCreateResource[v1.ResourceMemory] = *resource.NewQuantity(int64((ratio-memFraction)*float64(memAllocatableVal)), resource.BinarySI)
err := testutils.StartPods(cs, 1, ns, string(uuid.NewUUID()),
*initPausePod(f, pausePodConfig{
Name: "",
Labels: balancePodLabel,
Resources: &v1.ResourceRequirements{
Limits: needCreateResource,
Requests: needCreateResource,
},
NodeName: node.Name,
}), true, framework.Logf)
if err != nil {
return err
}
}
for _, node := range nodes {
By("Compute Cpu, Mem Fraction after create balanced pods.")
computeCpuMemFraction(cs, node, requestedResource)
}
return nil
}
func computeCpuMemFraction(cs clientset.Interface, node v1.Node, resource *v1.ResourceRequirements) (float64, float64) {
framework.Logf("ComputeCpuMemFraction for node: %v", node.Name)
totalRequestedCpuResource := resource.Requests.Cpu().MilliValue()
totalRequestedMemResource := resource.Requests.Memory().Value()
allpods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
if err != nil {
framework.Failf("Expect error of invalid, got : %v", err)
}
for _, pod := range allpods.Items {
if pod.Spec.NodeName == node.Name {
framework.Logf("Pod for on the node: %v, Cpu: %v, Mem: %v", pod.Name, getNonZeroRequests(&pod).MilliCPU, getNonZeroRequests(&pod).Memory)
totalRequestedCpuResource += getNonZeroRequests(&pod).MilliCPU
totalRequestedMemResource += getNonZeroRequests(&pod).Memory
}
}
cpuAllocatable, found := node.Status.Allocatable[v1.ResourceCPU]
Expect(found).To(Equal(true))
cpuAllocatableMil := cpuAllocatable.MilliValue()
cpuFraction := float64(totalRequestedCpuResource) / float64(cpuAllocatableMil)
memAllocatable, found := node.Status.Allocatable[v1.ResourceMemory]
Expect(found).To(Equal(true))
memAllocatableVal := memAllocatable.Value()
memFraction := float64(totalRequestedMemResource) / float64(memAllocatableVal)
framework.Logf("Node: %v, totalRequestedCpuResource: %v, cpuAllocatableMil: %v, cpuFraction: %v", node.Name, totalRequestedCpuResource, cpuAllocatableMil, cpuFraction)
framework.Logf("Node: %v, totalRequestedMemResource: %v, memAllocatableVal: %v, memFraction: %v", node.Name, totalRequestedMemResource, memAllocatableVal, memFraction)
return cpuFraction, memFraction
}
func getNonZeroRequests(pod *v1.Pod) Resource {
result := Resource{}
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
result.MilliCPU += cpu
result.Memory += memory
}
return result
}
func createRC(ns, rsName string, replicas int32, rcPodLabels map[string]string, f *framework.Framework, resource *v1.ResourceRequirements) *v1.ReplicationController {
rc := &v1.ReplicationController{
TypeMeta: metav1.TypeMeta{
Kind: "ReplicationController",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: rsName,
},
Spec: v1.ReplicationControllerSpec{
Replicas: &replicas,
Template: &v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: rcPodLabels,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: rsName,
Image: framework.GetPauseImageName(f.ClientSet),
Resources: *resource,
},
},
},
},
},
}
rc, err := f.ClientSet.CoreV1().ReplicationControllers(ns).Create(rc)
Expect(err).NotTo(HaveOccurred())
return rc
}
func addRandomTaitToNode(cs clientset.Interface, nodeName string) *v1.Taint {
testTaint := v1.Taint{
Key: fmt.Sprintf("kubernetes.io/e2e-taint-key-%s", string(uuid.NewUUID())),
Value: fmt.Sprintf("testing-taint-value-%s", string(uuid.NewUUID())),
Effect: v1.TaintEffectPreferNoSchedule,
}
framework.AddOrUpdateTaintOnNode(cs, nodeName, testTaint)
framework.ExpectNodeHasTaint(cs, nodeName, &testTaint)
return &testTaint
}

View file

@ -0,0 +1,132 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"fmt"
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/test/e2e/framework"
testutils "k8s.io/kubernetes/test/utils"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
const (
defaultTimeout = 3 * time.Minute
)
// This test requires Rescheduler to be enabled.
var _ = SIGDescribe("Rescheduler [Serial]", func() {
f := framework.NewDefaultFramework("rescheduler")
var ns string
var totalMillicores int
BeforeEach(func() {
framework.SkipUnlessProviderIs("gce", "gke")
ns = f.Namespace.Name
nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
nodeCount := len(nodes.Items)
Expect(nodeCount).NotTo(BeZero())
cpu := nodes.Items[0].Status.Capacity[v1.ResourceCPU]
totalMillicores = int((&cpu).MilliValue()) * nodeCount
})
It("should ensure that critical pod is scheduled in case there is no resources available", func() {
By("reserving all available cpu")
err := reserveAllCpu(f, "reserve-all-cpu", totalMillicores)
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, ns, "reserve-all-cpu")
framework.ExpectNoError(err)
By("creating a new instance of Dashboard and waiting for Dashboard to be scheduled")
label := labels.SelectorFromSet(labels.Set(map[string]string{"k8s-app": "kubernetes-dashboard"}))
listOpts := metav1.ListOptions{LabelSelector: label.String()}
deployments, err := f.ClientSet.Extensions().Deployments(metav1.NamespaceSystem).List(listOpts)
framework.ExpectNoError(err)
Expect(len(deployments.Items)).Should(Equal(1))
deployment := deployments.Items[0]
replicas := uint(*(deployment.Spec.Replicas))
err = framework.ScaleDeployment(f.ClientSet, f.InternalClientset, metav1.NamespaceSystem, deployment.Name, replicas+1, true)
defer framework.ExpectNoError(framework.ScaleDeployment(f.ClientSet, f.InternalClientset, metav1.NamespaceSystem, deployment.Name, replicas, true))
framework.ExpectNoError(err)
})
})
func reserveAllCpu(f *framework.Framework, id string, millicores int) error {
timeout := 5 * time.Minute
replicas := millicores / 100
reserveCpu(f, id, 1, 100)
framework.ExpectNoError(framework.ScaleRC(f.ClientSet, f.InternalClientset, f.Namespace.Name, id, uint(replicas), false))
for start := time.Now(); time.Since(start) < timeout; time.Sleep(10 * time.Second) {
pods, err := framework.GetPodsInNamespace(f.ClientSet, f.Namespace.Name, framework.ImagePullerLabels)
if err != nil {
return err
}
if len(pods) != replicas {
continue
}
allRunningOrUnschedulable := true
for _, pod := range pods {
if !podRunningOrUnschedulable(pod) {
allRunningOrUnschedulable = false
break
}
}
if allRunningOrUnschedulable {
return nil
}
}
return fmt.Errorf("Pod name %s: Gave up waiting %v for %d pods to come up", id, timeout, replicas)
}
func podRunningOrUnschedulable(pod *v1.Pod) bool {
_, cond := podutil.GetPodCondition(&pod.Status, v1.PodScheduled)
if cond != nil && cond.Status == v1.ConditionFalse && cond.Reason == "Unschedulable" {
return true
}
running, _ := testutils.PodRunningReady(pod)
return running
}
func reserveCpu(f *framework.Framework, id string, replicas, millicores int) {
By(fmt.Sprintf("Running RC which reserves %v millicores", millicores))
request := int64(millicores / replicas)
config := &testutils.RCConfig{
Client: f.ClientSet,
InternalClient: f.InternalClientset,
Name: id,
Namespace: f.Namespace.Name,
Timeout: defaultTimeout,
Image: framework.GetPauseImageName(f.ClientSet),
Replicas: replicas,
CpuRequest: request,
}
framework.ExpectNoError(framework.RunRC(*config))
}

View file

@ -0,0 +1,321 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/tools/cache"
"k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
testutils "k8s.io/kubernetes/test/utils"
. "github.com/onsi/ginkgo"
_ "github.com/stretchr/testify/assert"
)
func getTestTaint() v1.Taint {
return v1.Taint{
Key: "kubernetes.io/e2e-evict-taint-key",
Value: "evictTaintVal",
Effect: v1.TaintEffectNoExecute,
TimeAdded: metav1.Now(),
}
}
// Creates a defaut pod for this test, with argument saying if the Pod should have
// toleration for Taits used in this test.
func createPodForTaintsTest(hasToleration bool, tolerationSeconds int, podName, ns string) *v1.Pod {
grace := int64(1)
if !hasToleration {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: ns,
Labels: map[string]string{"name": podName},
DeletionGracePeriodSeconds: &grace,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "pause",
Image: "kubernetes/pause",
},
},
},
}
} else {
if tolerationSeconds <= 0 {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: ns,
Labels: map[string]string{"name": podName},
DeletionGracePeriodSeconds: &grace,
// default - tolerate forever
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "pause",
Image: "kubernetes/pause",
},
},
Tolerations: []v1.Toleration{{Key: "kubernetes.io/e2e-evict-taint-key", Value: "evictTaintVal", Effect: v1.TaintEffectNoExecute}},
},
}
} else {
ts := int64(tolerationSeconds)
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: ns,
Labels: map[string]string{"name": podName},
DeletionGracePeriodSeconds: &grace,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "pause",
Image: "kubernetes/pause",
},
},
// default - tolerate forever
Tolerations: []v1.Toleration{{Key: "kubernetes.io/e2e-evict-taint-key", Value: "evictTaintVal", Effect: v1.TaintEffectNoExecute, TolerationSeconds: &ts}},
},
}
}
}
}
// Creates and starts a controller (informer) that watches updates on a pod in given namespace with given name. It puts a new
// struct into observedDeletion channel for every deletion it sees.
func createTestController(cs clientset.Interface, observedDeletions chan struct{}, stopCh chan struct{}, podName, ns string) {
_, controller := cache.NewInformer(
&cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = fields.SelectorFromSet(fields.Set{"metadata.name": podName}).String()
obj, err := cs.CoreV1().Pods(ns).List(options)
return runtime.Object(obj), err
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = fields.SelectorFromSet(fields.Set{"metadata.name": podName}).String()
return cs.CoreV1().Pods(ns).Watch(options)
},
},
&v1.Pod{},
0,
cache.ResourceEventHandlerFuncs{
DeleteFunc: func(oldObj interface{}) { observedDeletions <- struct{}{} },
},
)
framework.Logf("Starting informer...")
go controller.Run(stopCh)
}
const (
KubeletPodDeletionDelaySeconds = 60
AdditionalWaitPerDeleteSeconds = 5
)
// Tests the behavior of NoExecuteTaintManager. Following scenarios are included:
// - eviction of non-tolerating pods from a tainted node,
// - lack of eviction of tolerating pods from a tainted node,
// - delayed eviction of short-tolerating pod from a tainted node,
// - lack of eviction of short-tolerating pod after taint removal.
var _ = SIGDescribe("NoExecuteTaintManager [Serial]", func() {
var cs clientset.Interface
var nodeList *v1.NodeList
var ns string
f := framework.NewDefaultFramework("taint-control")
BeforeEach(func() {
cs = f.ClientSet
ns = f.Namespace.Name
nodeList = &v1.NodeList{}
framework.WaitForAllNodesHealthy(cs, time.Minute)
err := framework.CheckTestingNSDeletedExcept(cs, ns)
framework.ExpectNoError(err)
})
// 1. Run a pod
// 2. Taint the node running this pod with a no-execute taint
// 3. See if pod will get evicted
It("evicts pods from tainted nodes", func() {
podName := "taint-eviction-1"
pod := createPodForTaintsTest(false, 0, podName, ns)
observedDeletions := make(chan struct{}, 100)
stopCh := make(chan struct{})
createTestController(cs, observedDeletions, stopCh, podName, ns)
By("Staring pod...")
nodeName, err := testutils.RunPodAndGetNodeName(cs, pod, 2*time.Minute)
framework.ExpectNoError(err)
framework.Logf("Pod is running on %v. Tainting Node", nodeName)
By("Trying to apply a taint on the Node")
testTaint := getTestTaint()
framework.AddOrUpdateTaintOnNode(cs, nodeName, testTaint)
framework.ExpectNodeHasTaint(cs, nodeName, &testTaint)
defer framework.RemoveTaintOffNode(cs, nodeName, testTaint)
// Wait a bit
By("Waiting for Pod to be deleted")
timeoutChannel := time.NewTimer(time.Duration(KubeletPodDeletionDelaySeconds+AdditionalWaitPerDeleteSeconds) * time.Second).C
select {
case <-timeoutChannel:
framework.Failf("Failed to evict Pod")
case <-observedDeletions:
framework.Logf("Noticed Pod eviction. Test successful")
}
})
// 1. Run a pod with toleration
// 2. Taint the node running this pod with a no-execute taint
// 3. See if pod wont get evicted
It("doesn't evict pod with tolerations from tainted nodes", func() {
podName := "taint-eviction-2"
pod := createPodForTaintsTest(true, 0, podName, ns)
observedDeletions := make(chan struct{}, 100)
stopCh := make(chan struct{})
createTestController(cs, observedDeletions, stopCh, podName, ns)
By("Staring pod...")
nodeName, err := testutils.RunPodAndGetNodeName(cs, pod, 2*time.Minute)
framework.ExpectNoError(err)
framework.Logf("Pod is running on %v. Tainting Node", nodeName)
By("Trying to apply a taint on the Node")
testTaint := getTestTaint()
framework.AddOrUpdateTaintOnNode(cs, nodeName, testTaint)
framework.ExpectNodeHasTaint(cs, nodeName, &testTaint)
defer framework.RemoveTaintOffNode(cs, nodeName, testTaint)
// Wait a bit
By("Waiting for Pod to be deleted")
timeoutChannel := time.NewTimer(time.Duration(KubeletPodDeletionDelaySeconds+AdditionalWaitPerDeleteSeconds) * time.Second).C
select {
case <-timeoutChannel:
framework.Logf("Pod wasn't evicted. Test successful")
case <-observedDeletions:
framework.Failf("Pod was evicted despite toleration")
}
})
// 1. Run a pod with a finite toleration
// 2. Taint the node running this pod with a no-execute taint
// 3. See if pod wont get evicted before toleration time runs out
// 4. See if pod will get evicted after toleration time runs out
It("eventually evict pod with finite tolerations from tainted nodes", func() {
podName := "taint-eviction-3"
pod := createPodForTaintsTest(true, KubeletPodDeletionDelaySeconds+2*AdditionalWaitPerDeleteSeconds, podName, ns)
observedDeletions := make(chan struct{}, 100)
stopCh := make(chan struct{})
createTestController(cs, observedDeletions, stopCh, podName, ns)
By("Staring pod...")
nodeName, err := testutils.RunPodAndGetNodeName(cs, pod, 2*time.Minute)
framework.ExpectNoError(err)
framework.Logf("Pod is running on %v. Tainting Node", nodeName)
By("Trying to apply a taint on the Node")
testTaint := getTestTaint()
framework.AddOrUpdateTaintOnNode(cs, nodeName, testTaint)
framework.ExpectNodeHasTaint(cs, nodeName, &testTaint)
defer framework.RemoveTaintOffNode(cs, nodeName, testTaint)
// Wait a bit
By("Waiting to see if a Pod won't be deleted")
timeoutChannel := time.NewTimer(time.Duration(KubeletPodDeletionDelaySeconds+AdditionalWaitPerDeleteSeconds) * time.Second).C
select {
case <-timeoutChannel:
framework.Logf("Pod wasn't evicted")
case <-observedDeletions:
framework.Failf("Pod was evicted despite toleration")
return
}
By("Waiting for Pod to be deleted")
timeoutChannel = time.NewTimer(time.Duration(KubeletPodDeletionDelaySeconds+AdditionalWaitPerDeleteSeconds) * time.Second).C
select {
case <-timeoutChannel:
framework.Failf("Pod wasn't evicted")
case <-observedDeletions:
framework.Logf("Pod was evicted after toleration time run out. Test successful")
return
}
})
// 1. Run a pod with short toleration
// 2. Taint the node running this pod with a no-execute taint
// 3. Wait some time
// 4. Remove the taint
// 5. See if Pod won't be evicted.
It("removing taint cancels eviction", func() {
podName := "taint-eviction-4"
pod := createPodForTaintsTest(true, 2*AdditionalWaitPerDeleteSeconds, podName, ns)
observedDeletions := make(chan struct{}, 100)
stopCh := make(chan struct{})
createTestController(cs, observedDeletions, stopCh, podName, ns)
By("Staring pod...")
nodeName, err := testutils.RunPodAndGetNodeName(cs, pod, 2*time.Minute)
framework.ExpectNoError(err)
framework.Logf("Pod is running on %v. Tainting Node", nodeName)
By("Trying to apply a taint on the Node")
testTaint := getTestTaint()
framework.AddOrUpdateTaintOnNode(cs, nodeName, testTaint)
framework.ExpectNodeHasTaint(cs, nodeName, &testTaint)
taintRemoved := false
defer func() {
if !taintRemoved {
framework.RemoveTaintOffNode(cs, nodeName, testTaint)
}
}()
// Wait a bit
By("Waiting short time to make sure Pod is queued for deletion")
timeoutChannel := time.NewTimer(AdditionalWaitPerDeleteSeconds).C
select {
case <-timeoutChannel:
framework.Logf("Pod wasn't evicted. Proceeding")
case <-observedDeletions:
framework.Failf("Pod was evicted despite toleration")
return
}
framework.Logf("Removing taint from Node")
framework.RemoveTaintOffNode(cs, nodeName, testTaint)
taintRemoved = true
By("Waiting some time to make sure that toleration time passed.")
timeoutChannel = time.NewTimer(time.Duration(KubeletPodDeletionDelaySeconds+3*AdditionalWaitPerDeleteSeconds) * time.Second).C
select {
case <-timeoutChannel:
framework.Logf("Pod wasn't evicted. Test successful")
case <-observedDeletions:
framework.Failf("Pod was evicted despite toleration")
}
})
})