Replace godep with dep

This commit is contained in:
Manuel de Brito Fontes 2017-10-06 17:26:14 -03:00
parent 1e7489927c
commit bf5616c65b
14883 changed files with 3937406 additions and 361781 deletions

View file

@ -0,0 +1,56 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"scheduler_interface.go",
"types.go",
"well_known_labels.go",
],
deps = [
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"scheduler_interface_test.go",
"types_test.go",
],
library = ":go_default_library",
deps = [
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//plugin/pkg/scheduler/algorithm/predicates:all-srcs",
"//plugin/pkg/scheduler/algorithm/priorities:all-srcs",
],
tags = ["automanaged"],
)

View file

@ -0,0 +1,19 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package scheduler contains a generic Scheduler interface and several
// implementations.
package algorithm // import "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"

View file

@ -0,0 +1,72 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"error.go",
"metadata.go",
"predicates.go",
"utils.go",
],
deps = [
"//pkg/api/v1/helper:go_default_library",
"//pkg/api/v1/helper/qos:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/volume/util:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//plugin/pkg/scheduler/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
"//vendor/k8s.io/metrics/pkg/client/clientset_generated/clientset:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"metadata_test.go",
"predicates_test.go",
"utils_test.go",
],
library = ":go_default_library",
deps = [
"//pkg/api/v1/helper:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//plugin/pkg/scheduler/testing:go_default_library",
"//plugin/pkg/scheduler/util:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View file

@ -0,0 +1,114 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
)
var (
// The predicateName tries to be consistent as the predicate name used in DefaultAlgorithmProvider defined in
// defaults.go (which tend to be stable for backward compatibility)
// NOTE: If you add a new predicate failure error for a predicate that can never
// be made to pass by removing pods, or you change an existing predicate so that
// it can never be made to pass by removing pods, you need to add the predicate
// failure error in nodesWherePreemptionMightHelp() in scheduler/core/generic_scheduler.go
ErrDiskConflict = newPredicateFailureError("NoDiskConflict")
ErrVolumeZoneConflict = newPredicateFailureError("NoVolumeZoneConflict")
ErrNodeSelectorNotMatch = newPredicateFailureError("MatchNodeSelector")
ErrPodAffinityNotMatch = newPredicateFailureError("MatchInterPodAffinity")
ErrTaintsTolerationsNotMatch = newPredicateFailureError("PodToleratesNodeTaints")
ErrPodNotMatchHostName = newPredicateFailureError("HostName")
ErrPodNotFitsHostPorts = newPredicateFailureError("PodFitsHostPorts")
ErrNodeLabelPresenceViolated = newPredicateFailureError("CheckNodeLabelPresence")
ErrServiceAffinityViolated = newPredicateFailureError("CheckServiceAffinity")
ErrMaxVolumeCountExceeded = newPredicateFailureError("MaxVolumeCount")
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure")
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure")
ErrNodeOutOfDisk = newPredicateFailureError("NodeOutOfDisk")
ErrNodeNotReady = newPredicateFailureError("NodeNotReady")
ErrNodeNetworkUnavailable = newPredicateFailureError("NodeNetworkUnavailable")
ErrNodeUnschedulable = newPredicateFailureError("NodeUnschedulable")
ErrNodeUnknownCondition = newPredicateFailureError("NodeUnknownCondition")
ErrVolumeNodeConflict = newPredicateFailureError("NoVolumeNodeConflict")
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicate.
ErrFakePredicate = newPredicateFailureError("FakePredicateError")
)
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
// resourceName is the name of the resource that is insufficient
ResourceName v1.ResourceName
requested int64
used int64
capacity int64
}
func NewInsufficientResourceError(resourceName v1.ResourceName, requested, used, capacity int64) *InsufficientResourceError {
return &InsufficientResourceError{
ResourceName: resourceName,
requested: requested,
used: used,
capacity: capacity,
}
}
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.ResourceName, e.requested, e.used, e.capacity)
}
func (e *InsufficientResourceError) GetReason() string {
return fmt.Sprintf("Insufficient %v", e.ResourceName)
}
func (e *InsufficientResourceError) GetInsufficientAmount() int64 {
return e.requested - (e.capacity - e.used)
}
type PredicateFailureError struct {
PredicateName string
}
func newPredicateFailureError(predicateName string) *PredicateFailureError {
return &PredicateFailureError{PredicateName: predicateName}
}
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}
func (e *PredicateFailureError) GetReason() string {
return e.PredicateName
}
type FailureReason struct {
reason string
}
func NewFailureReason(msg string) *FailureReason {
return &FailureReason{reason: msg}
}
func (e *FailureReason) GetReason() string {
return e.reason
}

View file

@ -0,0 +1,188 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"sync"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
schedutil "k8s.io/kubernetes/plugin/pkg/scheduler/util"
"github.com/golang/glog"
)
type PredicateMetadataFactory struct {
podLister algorithm.PodLister
}
// Note that predicateMetadata and matchingPodAntiAffinityTerm need to be declared in the same file
// due to the way declarations are processed in predicate declaration unit tests.
type matchingPodAntiAffinityTerm struct {
term *v1.PodAffinityTerm
node *v1.Node
}
// NOTE: When new fields are added/removed or logic is changed, please make sure that
// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
type predicateMetadata struct {
pod *v1.Pod
podBestEffort bool
podRequest *schedulercache.Resource
podPorts map[int]bool
//key is a pod full name with the anti-affinity rules.
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
}
// Ensure that predicateMetadata implements algorithm.PredicateMetadata.
var _ algorithm.PredicateMetadata = &predicateMetadata{}
// PredicateMetadataProducer: Helper types/variables...
type PredicateMetadataProducer func(pm *predicateMetadata)
var predicateMetaProducerRegisterLock sync.Mutex
var predicateMetadataProducers map[string]PredicateMetadataProducer = make(map[string]PredicateMetadataProducer)
func RegisterPredicateMetadataProducer(predicateName string, precomp PredicateMetadataProducer) {
predicateMetaProducerRegisterLock.Lock()
defer predicateMetaProducerRegisterLock.Unlock()
predicateMetadataProducers[predicateName] = precomp
}
func NewPredicateMetadataFactory(podLister algorithm.PodLister) algorithm.PredicateMetadataProducer {
factory := &PredicateMetadataFactory{
podLister,
}
return factory.GetMetadata
}
// GetMetadata returns the predicateMetadata used which will be used by various predicates.
func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInfoMap map[string]*schedulercache.NodeInfo) algorithm.PredicateMetadata {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
matchingTerms, err := getMatchingAntiAffinityTerms(pod, nodeNameToInfoMap)
if err != nil {
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetUsedPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
}
for predicateName, precomputeFunc := range predicateMetadataProducers {
glog.V(10).Infof("Precompute: %v", predicateName)
precomputeFunc(predicateMetadata)
}
return predicateMetadata
}
// RemovePod changes predicateMetadata assuming that the given `deletedPod` is
// deleted from the system.
func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("deletedPod and meta.pod must not be the same.")
}
// Delete any anti-affinity rule from the deletedPod.
delete(meta.matchingAntiAffinityTerms, deletedPodFullName)
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
if meta.serviceAffinityInUse &&
len(meta.serviceAffinityMatchingPodList) > 0 &&
deletedPod.Namespace == meta.serviceAffinityMatchingPodList[0].Namespace {
for i, pod := range meta.serviceAffinityMatchingPodList {
if schedutil.GetPodFullName(pod) == deletedPodFullName {
meta.serviceAffinityMatchingPodList = append(
meta.serviceAffinityMatchingPodList[:i],
meta.serviceAffinityMatchingPodList[i+1:]...)
break
}
}
}
return nil
}
// AddPod changes predicateMetadata assuming that `newPod` is added to the
// system.
func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache.NodeInfo) error {
addedPodFullName := schedutil.GetPodFullName(addedPod)
if addedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("addedPod and meta.pod must not be the same.")
}
if nodeInfo.Node() == nil {
return fmt.Errorf("Invalid node in nodeInfo.")
}
// Add matching anti-affinity terms of the addedPod to the map.
podMatchingTerms, err := getMatchingAntiAffinityTermsOfExistingPod(meta.pod, addedPod, nodeInfo.Node())
if err != nil {
return err
}
if len(podMatchingTerms) > 0 {
existingTerms, found := meta.matchingAntiAffinityTerms[addedPodFullName]
if found {
meta.matchingAntiAffinityTerms[addedPodFullName] = append(existingTerms,
podMatchingTerms...)
} else {
meta.matchingAntiAffinityTerms[addedPodFullName] = podMatchingTerms
}
}
// If addedPod is in the same namespace as the meta.pod, update the list
// of matching pods if applicable.
if meta.serviceAffinityInUse && addedPod.Namespace == meta.pod.Namespace {
selector := CreateSelectorFromLabels(meta.pod.Labels)
if selector.Matches(labels.Set(addedPod.Labels)) {
meta.serviceAffinityMatchingPodList = append(meta.serviceAffinityMatchingPodList,
addedPod)
}
}
return nil
}
// ShallowCopy copies a metadata struct into a new struct and creates a copy of
// its maps and slices, but it does not copy the contents of pointer values.
func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata {
newPredMeta := &predicateMetadata{
pod: meta.pod,
podBestEffort: meta.podBestEffort,
podRequest: meta.podRequest,
serviceAffinityInUse: meta.serviceAffinityInUse,
}
newPredMeta.podPorts = map[int]bool{}
for k, v := range meta.podPorts {
newPredMeta.podPorts[k] = v
}
newPredMeta.matchingAntiAffinityTerms = map[string][]matchingPodAntiAffinityTerm{}
for k, v := range meta.matchingAntiAffinityTerms {
newPredMeta.matchingAntiAffinityTerms[k] = append([]matchingPodAntiAffinityTerm(nil), v...)
}
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
meta.serviceAffinityMatchingPodServices...)
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
meta.serviceAffinityMatchingPodList...)
return (algorithm.PredicateMetadata)(newPredMeta)
}

View file

@ -0,0 +1,400 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"reflect"
"sort"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/plugin/pkg/scheduler/testing"
)
// sortableAntiAffinityTerms lets us to sort anti-affinity terms.
type sortableAntiAffinityTerms []matchingPodAntiAffinityTerm
// Less establishes some ordering between two matchingPodAntiAffinityTerms for
// sorting.
func (s sortableAntiAffinityTerms) Less(i, j int) bool {
t1, t2 := s[i], s[j]
if t1.node.Name != t2.node.Name {
return t1.node.Name < t2.node.Name
}
if len(t1.term.Namespaces) != len(t2.term.Namespaces) {
return len(t1.term.Namespaces) < len(t2.term.Namespaces)
}
if t1.term.TopologyKey != t2.term.TopologyKey {
return t1.term.TopologyKey < t2.term.TopologyKey
}
if len(t1.term.LabelSelector.MatchLabels) != len(t2.term.LabelSelector.MatchLabels) {
return len(t1.term.LabelSelector.MatchLabels) < len(t2.term.LabelSelector.MatchLabels)
}
return false
}
func (s sortableAntiAffinityTerms) Len() int { return len(s) }
func (s sortableAntiAffinityTerms) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
var _ = sort.Interface(sortableAntiAffinityTerms{})
func sortAntiAffinityTerms(terms map[string][]matchingPodAntiAffinityTerm) {
for k, v := range terms {
sortableTerms := sortableAntiAffinityTerms(v)
sort.Sort(sortableTerms)
terms[k] = sortableTerms
}
}
// sortablePods lets us to sort pods.
type sortablePods []*v1.Pod
func (s sortablePods) Less(i, j int) bool {
return s[i].Namespace < s[j].Namespace ||
(s[i].Namespace == s[j].Namespace && s[i].Name < s[j].Name)
}
func (s sortablePods) Len() int { return len(s) }
func (s sortablePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
var _ = sort.Interface(&sortablePods{})
// sortableServices allows us to sort services.
type sortableServices []*v1.Service
func (s sortableServices) Less(i, j int) bool {
return s[i].Namespace < s[j].Namespace ||
(s[i].Namespace == s[j].Namespace && s[i].Name < s[j].Name)
}
func (s sortableServices) Len() int { return len(s) }
func (s sortableServices) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
var _ = sort.Interface(&sortableServices{})
// predicateMetadataEquivalent returns true if the two metadata are equivalent.
// Note: this function does not compare podRequest.
func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
if !reflect.DeepEqual(meta1.pod, meta2.pod) {
return fmt.Errorf("pods are not the same.")
}
if meta1.podBestEffort != meta2.podBestEffort {
return fmt.Errorf("podBestEfforts are not equal.")
}
if meta1.serviceAffinityInUse != meta1.serviceAffinityInUse {
return fmt.Errorf("serviceAffinityInUses are not equal.")
}
if len(meta1.podPorts) != len(meta2.podPorts) {
return fmt.Errorf("podPorts are not equal.")
}
for !reflect.DeepEqual(meta1.podPorts, meta2.podPorts) {
return fmt.Errorf("podPorts are not equal.")
}
sortAntiAffinityTerms(meta1.matchingAntiAffinityTerms)
sortAntiAffinityTerms(meta2.matchingAntiAffinityTerms)
if !reflect.DeepEqual(meta1.matchingAntiAffinityTerms, meta2.matchingAntiAffinityTerms) {
return fmt.Errorf("matchingAntiAffinityTerms are not euqal.")
}
if meta1.serviceAffinityInUse {
sortablePods1 := sortablePods(meta1.serviceAffinityMatchingPodList)
sort.Sort(sortablePods1)
sortablePods2 := sortablePods(meta2.serviceAffinityMatchingPodList)
sort.Sort(sortablePods2)
if !reflect.DeepEqual(sortablePods1, sortablePods2) {
return fmt.Errorf("serviceAffinityMatchingPodLists are not euqal.")
}
sortableServices1 := sortableServices(meta1.serviceAffinityMatchingPodServices)
sort.Sort(sortableServices1)
sortableServices2 := sortableServices(meta2.serviceAffinityMatchingPodServices)
sort.Sort(sortableServices2)
if !reflect.DeepEqual(sortableServices1, sortableServices2) {
return fmt.Errorf("serviceAffinityMatchingPodServices are not euqal.")
}
}
return nil
}
func TestPredicateMetadata_AddRemovePod(t *testing.T) {
var label1 = map[string]string{
"region": "r1",
"zone": "z11",
}
var label2 = map[string]string{
"region": "r1",
"zone": "z12",
}
var label3 = map[string]string{
"region": "r2",
"zone": "z21",
}
selector1 := map[string]string{"foo": "bar"}
antiAffinityFooBar := &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar"},
},
},
},
TopologyKey: "region",
},
},
}
antiAffinityComplex := &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "buzz"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"bar", "security", "test"},
},
},
},
TopologyKey: "zone",
},
},
}
tests := []struct {
description string
pendingPod *v1.Pod
addedPod *v1.Pod
existingPods []*v1.Pod
nodes []*v1.Node
services []*v1.Service
}{
{
description: "no anti-affinity or service affinity exist",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{NodeName: "nodeC"},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeB"},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
description: "metadata anti-affinity terms are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeB",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
description: "metadata service-affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{NodeName: "nodeC"},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeB"},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
description: "metadata anti-affinity terms and service affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeA",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityComplex,
},
},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
}
for _, test := range tests {
allPodLister := schedulertesting.FakePodLister(append(test.existingPods, test.addedPod))
// getMeta creates predicate meta data given the list of pods.
getMeta := func(lister schedulertesting.FakePodLister) (*predicateMetadata, map[string]*schedulercache.NodeInfo) {
nodeInfoMap := schedulercache.CreateNodeNameToInfoMap(lister, test.nodes)
// nodeList is a list of non-pointer nodes to feed to FakeNodeListInfo.
nodeList := []v1.Node{}
for _, n := range test.nodes {
nodeList = append(nodeList, *n)
}
_, precompute := NewServiceAffinityPredicate(lister, schedulertesting.FakeServiceLister(test.services), FakeNodeListInfo(nodeList), nil)
RegisterPredicateMetadataProducer("ServiceAffinityMetaProducer", precompute)
pmf := PredicateMetadataFactory{lister}
meta := pmf.GetMetadata(test.pendingPod, nodeInfoMap)
return meta.(*predicateMetadata), nodeInfoMap
}
// allPodsMeta is meta data produced when all pods, including test.addedPod
// are given to the metadata producer.
allPodsMeta, _ := getMeta(allPodLister)
// existingPodsMeta1 is meta data produced for test.existingPods (without test.addedPod).
existingPodsMeta1, nodeInfoMap := getMeta(schedulertesting.FakePodLister(test.existingPods))
// Add test.addedPod to existingPodsMeta1 and make sure meta is equal to allPodsMeta
nodeInfo := nodeInfoMap[test.addedPod.Spec.NodeName]
if err := existingPodsMeta1.AddPod(test.addedPod, nodeInfo); err != nil {
t.Errorf("test [%v]: error adding pod to meta: %v", test.description, err)
}
if err := predicateMetadataEquivalent(allPodsMeta, existingPodsMeta1); err != nil {
t.Errorf("test [%v]: meta data are not equivalent: %v", test.description, err)
}
// Remove the added pod and from existingPodsMeta1 an make sure it is equal
// to meta generated for existing pods.
existingPodsMeta2, _ := getMeta(schedulertesting.FakePodLister(test.existingPods))
if err := existingPodsMeta1.RemovePod(test.addedPod); err != nil {
t.Errorf("test [%v]: error removing pod from meta: %v", test.description, err)
}
if err := predicateMetadataEquivalent(existingPodsMeta1, existingPodsMeta2); err != nil {
t.Errorf("test [%v]: meta data are not equivalent: %v", test.description, err)
}
}
}
// TestPredicateMetadata_ShallowCopy tests the ShallowCopy function. It is based
// on the idea that shallow-copy should produce an object that is deep-equal to the original
// object.
func TestPredicateMetadata_ShallowCopy(t *testing.T) {
source := predicateMetadata{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "testns",
},
},
podBestEffort: true,
podRequest: &schedulercache.Resource{
MilliCPU: 1000,
Memory: 300,
AllowedPodNumber: 4,
},
podPorts: map[int]bool{1234: true, 456: false},
matchingAntiAffinityTerms: map[string][]matchingPodAntiAffinityTerm{
"term1": {
{
term: &v1.PodAffinityTerm{TopologyKey: "node"},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
},
},
},
},
serviceAffinityInUse: true,
serviceAffinityMatchingPodList: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "pod2"}},
},
serviceAffinityMatchingPodServices: []*v1.Service{
{ObjectMeta: metav1.ObjectMeta{Name: "service1"}},
},
}
if !reflect.DeepEqual(source.ShallowCopy().(*predicateMetadata), &source) {
t.Errorf("Copy is not equal to source!")
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)
// FindLabelsInSet gets as many key/value pairs as possible out of a label set.
func FindLabelsInSet(labelsToKeep []string, selector labels.Set) map[string]string {
aL := make(map[string]string)
for _, l := range labelsToKeep {
if selector.Has(l) {
aL[l] = selector.Get(l)
}
}
return aL
}
// AddUnsetLabelsToMap backfills missing values with values we find in a map.
func AddUnsetLabelsToMap(aL map[string]string, labelsToAdd []string, labelSet labels.Set) {
for _, l := range labelsToAdd {
// if the label is already there, dont overwrite it.
if _, exists := aL[l]; exists {
continue
}
// otherwise, backfill this label.
if labelSet.Has(l) {
aL[l] = labelSet.Get(l)
}
}
}
// FilterPodsByNamespace filters pods outside a namespace from the given list.
func FilterPodsByNamespace(pods []*v1.Pod, ns string) []*v1.Pod {
filtered := []*v1.Pod{}
for _, nsPod := range pods {
if nsPod.Namespace == ns {
filtered = append(filtered, nsPod)
}
}
return filtered
}
// CreateSelectorFromLabels is used to define a selector that corresponds to the keys in a map.
func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
if aL == nil || len(aL) == 0 {
return labels.Everything()
}
return labels.Set(aL).AsSelector()
}
// GetEquivalencePod returns a EquivalencePod which contains a group of pod attributes which can be reused.
func GetEquivalencePod(pod *v1.Pod) interface{} {
// For now we only consider pods:
// 1. OwnerReferences is Controller
// 2. with same OwnerReferences
// to be equivalent
if len(pod.OwnerReferences) != 0 {
for _, ref := range pod.OwnerReferences {
if *ref.Controller {
// a pod can only belongs to one controller
return &EquivalencePod{
ControllerRef: ref,
}
}
}
}
return nil
}
// EquivalencePod is a group of pod attributes which can be reused as equivalence to schedule other pods.
type EquivalencePod struct {
ControllerRef metav1.OwnerReference
}

View file

@ -0,0 +1,70 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)
// ExampleUtils is a https://blog.golang.org/examples styled unit test.
func ExampleFindLabelsInSet() {
labelSubset := labels.Set{}
labelSubset["label1"] = "value1"
labelSubset["label2"] = "value2"
// Lets make believe that these pods are on the cluster.
// Utility functions will inspect their labels, filter them, and so on.
nsPods := []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
Labels: map[string]string{
"label1": "wontSeeThis",
"label2": "wontSeeThis",
"label3": "will_see_this",
},
},
}, // first pod which will be used via the utilities
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod2",
Namespace: "ns1",
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod3ThatWeWontSee",
},
},
}
fmt.Println(FindLabelsInSet([]string{"label1", "label2", "label3"}, nsPods[0].ObjectMeta.Labels)["label3"])
AddUnsetLabelsToMap(labelSubset, []string{"label1", "label2", "label3"}, nsPods[0].ObjectMeta.Labels)
fmt.Println(labelSubset)
for _, pod := range FilterPodsByNamespace(nsPods, "ns1") {
fmt.Print(pod.Name, ",")
}
// Output:
// will_see_this
// label1=value1,label2=value2,label3=will_see_this
// pod1,pod2,
}

View file

@ -0,0 +1,87 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"balanced_resource_allocation.go",
"image_locality.go",
"interpod_affinity.go",
"least_requested.go",
"metadata.go",
"most_requested.go",
"node_affinity.go",
"node_label.go",
"node_prefer_avoid_pods.go",
"selector_spreading.go",
"taint_toleration.go",
"test_util.go",
],
deps = [
"//pkg/api/v1/helper:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/util/node:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/predicates:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"balanced_resource_allocation_test.go",
"image_locality_test.go",
"interpod_affinity_test.go",
"least_requested_test.go",
"metadata_test.go",
"most_requested_test.go",
"node_affinity_test.go",
"node_label_test.go",
"node_prefer_avoid_pods_test.go",
"selector_spreading_test.go",
"taint_toleration_test.go",
],
library = ":go_default_library",
deps = [
"//pkg/kubelet/apis:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//plugin/pkg/scheduler/testing:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//plugin/pkg/scheduler/algorithm/priorities/util:all-srcs",
],
tags = ["automanaged"],
)

View file

@ -0,0 +1,116 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"math"
"k8s.io/api/core/v1"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
const (
mb int64 = 1024 * 1024
minImgSize int64 = 23 * mb
maxImgSize int64 = 1000 * mb
)
// Also used in most/least_requested nad metadata.
// TODO: despaghettify it
func getNonZeroRequests(pod *v1.Pod) *schedulercache.Resource {
result := &schedulercache.Resource{}
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
result.MilliCPU += cpu
result.Memory += memory
}
return result
}
func calculateBalancedResourceAllocation(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
score := int(0)
if cpuFraction >= 1 || memoryFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
score = 0
} else {
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multilying the absolute value of the difference by 10 scales the value to
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)
score = int((1 - diff) * float64(schedulerapi.MaxPriority))
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
score,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: score,
}, nil
}
func fractionOfCapacity(requested, capacity int64) float64 {
if capacity == 0 {
return 1
}
return float64(requested) / float64(capacity)
}
// BalancedResourceAllocationMap favors nodes with balanced resource usage rate.
// BalancedResourceAllocationMap should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
// close the two metrics are to each other.
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
func BalancedResourceAllocationMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadatat - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
}

View file

@ -0,0 +1,264 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestBalancedResourceAllocation(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
machine1Spec := v1.PodSpec{
NodeName: "machine1",
}
machine2Spec := v1.PodSpec{
NodeName: "machine2",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (remaining resources) on 0-10 scale
CPU Fraction: 0 / 4000 = 0%
Memory Fraction: 0 / 10000 = 0%
Node1 Score: 10 - (0-0)*10 = 10
Node2 scores (remaining resources) on 0-10 scale
CPU Fraction: 0 / 4000 = 0 %
Memory Fraction: 0 / 10000 = 0%
Node2 Score: 10 - (0-0)*10 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 3000 / 4000= 75%
Memory Fraction: 5000 / 10000 = 50%
Node1 Score: 10 - (0.75-0.5)*10 = 7
Node2 scores on 0-10 scale
CPU Fraction: 3000 / 6000= 50%
Memory Fraction: 5000/10000 = 50%
Node2 Score: 10 - (0.5-0.5)*10 = 10
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 0 / 4000= 0%
Memory Fraction: 0 / 10000 = 0%
Node1 Score: 10 - (0-0)*10 = 10
Node2 scores on 0-10 scale
CPU Fraction: 0 / 4000= 0%
Memory Fraction: 0 / 10000 = 0%
Node2 Score: 10 - (0-0)*10 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 0 / 20000 = 0%
Node1 Score: 10 - (0.6-0)*10 = 4
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node2 Score: 10 - (0.6-0.25)*10 = 6
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node1 Score: 10 - (0.6-0.25)*10 = 6
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 10000 / 20000 = 50%
Node2 Score: 10 - (0.6-0.5)*10 = 9
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node1 Score: 10 - (0.6-0.25)*10 = 6
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 10000 / 50000 = 20%
Node2 Score: 10 - (0.6-0.2)*10 = 6
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
Memory Fraction: 0 / 10000 = 0
Node1 Score: 0
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
Memory Fraction 5000 / 10000 = 50%
Node2 Score: 0
*/
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(BalancedResourceAllocationMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,79 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// ImageLocalityPriorityMap is a priority function that favors nodes that already have requested pod container's images.
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
// based on the total size of those images.
// - If none of the images are present, this node will be given the lowest priority.
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
func ImageLocalityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var sumSize int64
for i := range pod.Spec.Containers {
sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: calculateScoreFromSize(sumSize),
}, nil
}
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
// 1. Split image size range into 10 buckets.
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
func calculateScoreFromSize(sumSize int64) int {
var score int
switch {
case sumSize == 0 || sumSize < minImgSize:
// score == 0 means none of the images required by this pod are present on this
// node or the total size of the images present is too small to be taken into further consideration.
score = 0
// If existing images' total size is larger than max, just make it highest priority.
case sumSize >= maxImgSize:
score = schedulerapi.MaxPriority
default:
score = int((int64(schedulerapi.MaxPriority) * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
}
// Return which bucket the given size belongs to
return score
}
// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
func checkContainerImageOnNode(node *v1.Node, container *v1.Container) int64 {
for _, image := range node.Status.Images {
for _, name := range image.Names {
if container.Image == name {
// Should return immediately.
return image.SizeBytes
}
}
}
return 0
}

View file

@ -0,0 +1,183 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestImageLocalityPriority(t *testing.T) {
test_40_250 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/250",
},
},
}
test_40_140 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/140",
},
},
}
test_min_max := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/10",
},
{
Image: "gcr.io/2000",
},
},
}
node_40_140_2000 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/40",
"gcr.io/40:v1",
"gcr.io/40:v1",
},
SizeBytes: int64(40 * mb),
},
{
Names: []string{
"gcr.io/140",
"gcr.io/140:v1",
},
SizeBytes: int64(140 * mb),
},
{
Names: []string{
"gcr.io/2000",
},
SizeBytes: int64(2000 * mb),
},
},
}
node_250_10 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/250",
},
SizeBytes: int64(250 * mb),
},
{
Names: []string{
"gcr.io/10",
"gcr.io/10:v1",
},
SizeBytes: int64(10 * mb),
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
// Pod: gcr.io/40 gcr.io/250
// Node1
// Image: gcr.io/40 40MB
// Score: (40M-23M)/97.7M + 1 = 1
// Node2
// Image: gcr.io/250 250MB
// Score: (250M-23M)/97.7M + 1 = 3
pod: &v1.Pod{Spec: test_40_250},
nodes: []*v1.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
test: "two images spread on two nodes, prefer the larger image one",
},
{
// Pod: gcr.io/40 gcr.io/140
// Node1
// Image: gcr.io/40 40MB, gcr.io/140 140MB
// Score: (40M+140M-23M)/97.7M + 1 = 2
// Node2
// Image: not present
// Score: 0
pod: &v1.Pod{Spec: test_40_140},
nodes: []*v1.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
test: "two images on one node, prefer this node",
},
{
// Pod: gcr.io/2000 gcr.io/10
// Node1
// Image: gcr.io/2000 2000MB
// Score: 2000 > max score = 10
// Node2
// Image: gcr.io/10 10MB
// Score: 10 < min score = 0
pod: &v1.Pod{Spec: test_min_max},
nodes: []*v1.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "if exceed limit, use limit",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}
func makeImageNode(node string, status v1.NodeStatus) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: node},
Status: status,
}
}

View file

@ -0,0 +1,238 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"strings"
"sync"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/workqueue"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
type InterPodAffinity struct {
info predicates.NodeInfo
nodeLister algorithm.NodeLister
podLister algorithm.PodLister
hardPodAffinityWeight int
}
func NewInterPodAffinityPriority(
info predicates.NodeInfo,
nodeLister algorithm.NodeLister,
podLister algorithm.PodLister,
hardPodAffinityWeight int) algorithm.PriorityFunction {
interPodAffinity := &InterPodAffinity{
info: info,
nodeLister: nodeLister,
podLister: podLister,
hardPodAffinityWeight: hardPodAffinityWeight,
}
return interPodAffinity.CalculateInterPodAffinityPriority
}
type podAffinityPriorityMap struct {
sync.Mutex
// nodes contain all nodes that should be considered
nodes []*v1.Node
// counts store the mapping from node name to so-far computed score of
// the node.
counts map[string]float64
// failureDomains contain default failure domains keys
failureDomains priorityutil.Topologies
// The first error that we faced.
firstError error
}
func newPodAffinityPriorityMap(nodes []*v1.Node) *podAffinityPriorityMap {
return &podAffinityPriorityMap{
nodes: nodes,
counts: make(map[string]float64, len(nodes)),
failureDomains: priorityutil.Topologies{DefaultKeys: strings.Split(kubeletapis.DefaultFailureDomains, ",")},
}
}
func (p *podAffinityPriorityMap) setError(err error) {
p.Lock()
defer p.Unlock()
if p.firstError == nil {
p.firstError = err
}
}
func (p *podAffinityPriorityMap) processTerm(term *v1.PodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, weight float64) {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(podDefiningAffinityTerm, term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
p.setError(err)
return
}
match := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, namespaces, selector)
if match {
func() {
p.Lock()
defer p.Unlock()
for _, node := range p.nodes {
if p.failureDomains.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
p.counts[node.Name] += weight
}
}
}()
}
}
func (p *podAffinityPriorityMap) processTerms(terms []v1.WeightedPodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, multiplier int) {
for i := range terms {
term := &terms[i]
p.processTerm(&term.PodAffinityTerm, podDefiningAffinityTerm, podToCheck, fixedNode, float64(term.Weight*int32(multiplier)))
}
}
// CalculateInterPodAffinityPriority compute a sum by iterating through the elements of weightedPodAffinityTerm and adding
// "weight" to the sum if the corresponding PodAffinityTerm is satisfied for
// that node; the node(s) with the highest sum are the most preferred.
// Symmetry need to be considered for preferredDuringSchedulingIgnoredDuringExecution from podAffinity & podAntiAffinity,
// symmetry need to be considered for hard requirements from podAffinity
func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
affinity := pod.Spec.Affinity
hasAffinityConstraints := affinity != nil && affinity.PodAffinity != nil
hasAntiAffinityConstraints := affinity != nil && affinity.PodAntiAffinity != nil
allNodeNames := make([]string, 0, len(nodeNameToInfo))
for name := range nodeNameToInfo {
allNodeNames = append(allNodeNames, name)
}
// convert the topology key based weights to the node name based weights
var maxCount float64
var minCount float64
// priorityMap stores the mapping from node name to so-far computed score of
// the node.
pm := newPodAffinityPriorityMap(nodes)
processPod := func(existingPod *v1.Pod) error {
existingPodNode, err := ipa.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
return err
}
existingPodAffinity := existingPod.Spec.Affinity
existingHasAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAffinity != nil
existingHasAntiAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAntiAffinity != nil
if hasAffinityConstraints {
// For every soft pod affinity term of <pod>, if <existingPod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPods>`s node by the term`s weight.
terms := affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, pod, existingPod, existingPodNode, 1)
}
if hasAntiAffinityConstraints {
// For every soft pod anti-affinity term of <pod>, if <existingPod> matches the term,
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>`s node by the term`s weight.
terms := affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, pod, existingPod, existingPodNode, -1)
}
if existingHasAffinityConstraints {
// For every hard pod affinity term of <existingPod>, if <pod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the constant <ipa.hardPodAffinityWeight>
if ipa.hardPodAffinityWeight > 0 {
terms := existingPodAffinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
//if len(existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
// terms = append(terms, existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
//}
for _, term := range terms {
pm.processTerm(&term, existingPod, pod, existingPodNode, float64(ipa.hardPodAffinityWeight))
}
}
// For every soft pod affinity term of <existingPod>, if <pod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the term's weight.
terms := existingPodAffinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, existingPod, pod, existingPodNode, 1)
}
if existingHasAntiAffinityConstraints {
// For every soft pod anti-affinity term of <existingPod>, if <pod> matches the term,
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the term's weight.
terms := existingPodAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, existingPod, pod, existingPodNode, -1)
}
return nil
}
processNode := func(i int) {
nodeInfo := nodeNameToInfo[allNodeNames[i]]
if hasAffinityConstraints || hasAntiAffinityConstraints {
// We need to process all the nodes.
for _, existingPod := range nodeInfo.Pods() {
if err := processPod(existingPod); err != nil {
pm.setError(err)
}
}
} else {
// The pod doesn't have any constraints - we need to check only existing
// ones that have some.
for _, existingPod := range nodeInfo.PodsWithAffinity() {
if err := processPod(existingPod); err != nil {
pm.setError(err)
}
}
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
if pm.firstError != nil {
return nil, pm.firstError
}
for _, node := range nodes {
if pm.counts[node.Name] > maxCount {
maxCount = pm.counts[node.Name]
}
if pm.counts[node.Name] < minCount {
minCount = pm.counts[node.Name]
}
}
// calculate final priority score for each node
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for _, node := range nodes {
fScore := float64(0)
if (maxCount - minCount) > 0 {
fScore = float64(schedulerapi.MaxPriority) * ((pm.counts[node.Name] - minCount) / (maxCount - minCount))
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof("%v -> %v: InterPodAffinityPriority, Score: (%d)", pod.Name, node.Name, int(fScore))
}
}
return result, nil
}

View file

@ -0,0 +1,615 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/plugin/pkg/scheduler/testing"
)
type FakeNodeListInfo []*v1.Node
func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
for _, node := range nodes {
if node.Name == nodeName {
return node, nil
}
}
return nil, fmt.Errorf("Unable to find node: %s", nodeName)
}
func TestInterPodAffinityPriority(t *testing.T) {
labelRgChina := map[string]string{
"region": "China",
}
labelRgIndia := map[string]string{
"region": "India",
}
labelAzAz1 := map[string]string{
"az": "az1",
}
labelAzAz2 := map[string]string{
"az": "az2",
}
labelRgChinaAzAz1 := map[string]string{
"region": "China",
"az": "az1",
}
podLabelSecurityS1 := map[string]string{
"security": "S1",
}
podLabelSecurityS2 := map[string]string{
"security": "S2",
}
// considered only preferredDuringSchedulingIgnoredDuringExecution in pod affinity
stayWithS1InRegion := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "region",
},
},
},
},
}
stayWithS2InRegion := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 6,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S2"},
},
},
},
TopologyKey: "region",
},
},
},
},
}
affinity3 := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 8,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"S1"},
}, {
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S2"},
},
},
},
TopologyKey: "region",
},
}, {
Weight: 2,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpExists,
}, {
Key: "wrongkey",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
TopologyKey: "region",
},
},
},
},
}
hardAffinity := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1", "value2"},
},
},
},
TopologyKey: "region",
}, {
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpExists,
}, {
Key: "wrongkey",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
TopologyKey: "region",
},
},
},
}
awayFromS1InAz := &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "az",
},
},
},
},
}
// to stay away from security S2 in any az.
awayFromS2InAz := &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S2"},
},
},
},
TopologyKey: "az",
},
},
},
},
}
// to stay with security S1 in same region, stay away from security S2 in any az.
stayWithS1InRegionAwayFromS2InAz := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 8,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "region",
},
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S2"},
},
},
},
TopologyKey: "az",
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as Affinity is nil",
},
// the node(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score
// the node(machine3) that don't have the label {"region": "whatever the value is"} (mismatch the topology key) but that have existing pods that match the labelSelector get low score
// the node(machine2) that have the label {"region": "China"} (match the topology key) but that have existing pods that mismatch the labelSelector get low score
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "Affinity: pod that matches topology key & pods in nodes will get high score comparing to others" +
"which doesn't match either pods in nodes or in topology key",
},
// the node1(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score
// the node2(machine2) that have the label {"region": "China"}, match the topology key and have the same label value with node1, get the same high score with node1
// the node3(machine3) that have the label {"region": "India"}, match the topology key but have a different label value, don't have existing pods that match the labelSelector,
// get a low score.
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegion}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChinaAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "All the nodes that have the same topology key & label value with one of them has an existing pod that match the affinity rules, have the same score",
},
// there are 2 regions, say regionChina(machine1,machine3,machine4) and regionIndia(machine2,machine5), both regions have nodes that match the preference.
// But there are more nodes(actually more existing pods) in regionChina that match the preference than regionIndia.
// Then, nodes in regionChina get higher score than nodes in regionIndia, and all the nodes in regionChina should get a same score(high score),
// while all the nodes in regionIndia should get another same score(low score).
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS2InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine4"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine5"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 5}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: schedulerapi.MaxPriority}, {Host: "machine5", Score: 5}},
test: "Affinity: nodes in one region has more matching pods comparing to other reqion, so the region which has more macthes will get high score",
},
// Test with the different operators and values for pod affinity scheduling preference, including some match failures.
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: affinity3}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity: different Label operators and values for pod affinity scheduling preference, including some match failures ",
},
// Test the symmetry cases for affinity, the difference between affinity and symmetry is not the pod wants to run together with some existing pods,
// but the existing pods have the inter pod affinity preference while the pod to schedule satisfy the preference.
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: stayWithS1InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: stayWithS2InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity symmetry: considred only the preferredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: hardAffinity}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: hardAffinity}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity symmetry: considred RequiredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
},
// The pod to schedule prefer to stay away from some existing pods at node level using the pod anti affinity.
// the nodes that have the label {"node": "bar"} (match the topology key) and that have existing pods that match the labelSelector get low score
// the nodes that don't have the label {"node": "whatever the value is"} (mismatch the topology key) but that have existing pods that match the labelSelector get high score
// the nodes that have the label {"node": "bar"} (match the topology key) but that have existing pods that mismatch the labelSelector get high score
// there are 2 nodes, say node1 and node2, both nodes have pods that match the labelSelector and have topology-key in node.Labels.
// But there are more pods on node1 that match the preference than node2. Then, node1 get a lower score than node2.
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: pod that doesnot match existing pods in node will get high score ",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: pod that does not matches topology key & matches the pods in nodes will get higher score comparing to others ",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: one node has more matching pods comparing to other node, so the node which has more unmacthes will get high score",
},
// Test the symmetry cases for anti affinity
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: awayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity symmetry: the existing pods in node which has anti affinity match will get high score",
},
// Test both affinity and anti-affinity
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "Affinity and Anti Affinity: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity",
},
// Combined cases considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels (they are in the same RC/service),
// the pod prefer to run together with its brother pods in the same region, but wants to stay away from them at node level,
// so that all the pods of a RC/service can stay in a same region but trying to separate with each other
// machine-1,machine-3,machine-4 are in ChinaRegion others machin-2,machine-5 are in IndiaRegion
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine4"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine5"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChinaAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 4}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: schedulerapi.MaxPriority}, {Host: "machine5", Score: 4}},
test: "Affinity and Anti Affinity: considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels",
},
// Consider Affinity, Anti Affinity and symmetry together.
// for Affinity, the weights are: 8, 0, 0, 0
// for Anti Affinity, the weights are: 0, -5, 0, 0
// for Affinity symmetry, the weights are: 0, 0, 8, 0
// for Anti Affinity symmetry, the weights are: 0, 0, 0, -5
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine3", Affinity: stayWithS1InRegionAwayFromS2InAz}},
{Spec: v1.PodSpec{NodeName: "machine4", Affinity: awayFromS1InAz}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelAzAz2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: 0}},
test: "Affinity and Anti Affinity and symmetry: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity & symmetry",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
interPodAffinity := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
}
list, err := interPodAffinity.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected \n\t%#v, \ngot \n\t%#v\n", test.test, test.expectedList, list)
}
}
}
func TestHardPodAffinitySymmetricWeight(t *testing.T) {
podLabelServiceS1 := map[string]string{
"service": "S1",
}
labelRgChina := map[string]string{
"region": "China",
}
labelRgIndia := map[string]string{
"region": "India",
}
labelAzAz1 := map[string]string{
"az": "az1",
}
hardPodAffinity := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "region",
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
hardPodAffinityWeight int
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: hardPodAffinity}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: hardPodAffinity}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Hard Pod Affinity symmetry: hard pod affinity symmetry weights 1 by default, then nodes that match the hard pod affinity symmetry rules, get a high score",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: hardPodAffinity}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: hardPodAffinity}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
hardPodAffinityWeight: 0,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "Hard Pod Affinity symmetry: hard pod affinity symmetry is closed(weights 0), then nodes that match the hard pod affinity symmetry rules, get same score with those not match",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
ipa := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: test.hardPodAffinityWeight,
}
list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected \n\t%#v, \ngot \n\t%#v\n", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,91 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the minimum of the average of the fraction of requested to capacity.
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
func LeastRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadata - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
}
// The unused capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more unused resources the higher the score is.
func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
glog.V(10).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
requested, capacity, node)
return 0
}
return ((capacity - requested) * int64(schedulerapi.MaxPriority)) / capacity
}
// Calculates host priority based on the amount of unused resources.
// 'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateUnusedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
cpuScore, memoryScore,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int((cpuScore + memoryScore) / 2),
}, nil
}

View file

@ -0,0 +1,264 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestLeastRequested(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
machine1Spec := v1.PodSpec{
NodeName: "machine1",
}
machine2Spec := v1.PodSpec{
NodeName: "machine2",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (remaining resources) on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (10 + 10) / 2 = 10
Node2 scores (remaining resources) on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node2 Score: (10 + 10) / 2 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 3000) *10) / 4000 = 2.5
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node1 Score: (2.5 + 5) / 2 = 3
Node2 scores on 0-10 scale
CPU Score: ((6000 - 3000) *10) / 6000 = 5
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node2 Score: (5 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (10 + 10) / 2 = 10
Node2 scores on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node2 Score: (10 + 10) / 2 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 0) *10) / 20000 = 10
Node1 Score: (4 + 10) / 2 = 7
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node2 Score: (4 + 7.5) / 2 = 5
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node1 Score: (4 + 7.5) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 10000) *10) / 20000 = 5
Node2 Score: (4 + 5) / 2 = 4
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node1 Score: (4 + 7.5) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((50000 - 10000) *10) / 50000 = 8
Node2 Score: (4 + 8) / 2 = 6
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 6000) *10) / 4000 = 0
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (0 + 10) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((4000 - 6000) *10) / 4000 = 0
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node2 Score: (0 + 5) / 2 = 2
*/
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}},
test: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(LeastRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,43 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"k8s.io/api/core/v1"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// priorityMetadata is a type that is passed as metadata for priority functions
type priorityMetadata struct {
nonZeroRequest *schedulercache.Resource
podTolerations []v1.Toleration
affinity *v1.Affinity
}
// PriorityMetadata is a MetadataProducer. Node info can be nil.
func PriorityMetadata(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
tolerationsPreferNoSchedule := getAllTolerationPreferNoSchedule(pod.Spec.Tolerations)
return &priorityMetadata{
nonZeroRequest: getNonZeroRequests(pod),
podTolerations: tolerationsPreferNoSchedule,
affinity: pod.Spec.Affinity,
}
}

View file

@ -0,0 +1,132 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestPriorityMetadata(t *testing.T) {
nonZeroReqs := &schedulercache.Resource{}
nonZeroReqs.MilliCPU = priorityutil.DefaultMilliCpuRequest
nonZeroReqs.Memory = priorityutil.DefaultMemoryRequest
specifiedReqs := &schedulercache.Resource{}
specifiedReqs.MilliCPU = 200
specifiedReqs.Memory = 2000
tolerations := []v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}
podAffinity := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "region",
},
},
},
},
}
podWithTolerationsAndAffinity := &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: "image",
ImagePullPolicy: "Always",
},
},
Affinity: podAffinity,
Tolerations: tolerations,
},
}
podWithTolerationsAndRequests := &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: "image",
ImagePullPolicy: "Always",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
},
Tolerations: tolerations,
},
}
tests := []struct {
pod *v1.Pod
test string
expected interface{}
}{
{
pod: nil,
expected: nil,
test: "pod is nil , priorityMetadata is nil",
},
{
pod: podWithTolerationsAndAffinity,
expected: &priorityMetadata{
nonZeroRequest: nonZeroReqs,
podTolerations: tolerations,
affinity: podAffinity,
},
test: "Produce a priorityMetadata with default requests",
},
{
pod: podWithTolerationsAndRequests,
expected: &priorityMetadata{
nonZeroRequest: specifiedReqs,
podTolerations: tolerations,
affinity: nil,
},
test: "Produce a priorityMetadata with specified requests",
},
}
for _, test := range tests {
ptData := PriorityMetadata(test.pod, nil)
if !reflect.DeepEqual(test.expected, ptData) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expected, ptData)
}
}
}

View file

@ -0,0 +1,94 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the maximum of the average of the fraction of requested to capacity.
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
func MostRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadatat - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
}
// The used capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more resources are used the higher the score is. This function
// is almost a reversed version of least_requested_priority.calculatUnusedScore
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
// keep the final formula clean and not to modify the widely used (by users
// in their default scheduling policies) calculateUSedScore.
func calculateUsedScore(requested int64, capacity int64, node string) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
glog.V(10).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
requested, capacity, node)
return 0
}
return (requested * schedulerapi.MaxPriority) / capacity
}
// Calculate the resource used on a node. 'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateUsedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
cpuScore, memoryScore,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int((cpuScore + memoryScore) / 2),
}, nil
}

View file

@ -0,0 +1,221 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestMostRequested(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
bigCpuAndMemory := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("4000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3000m"),
v1.ResourceMemory: resource.MustParse("5000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (used resources) on 0-10 scale
CPU Score: (0 * 10 / 4000 = 0
Memory Score: (0 * 10) / 10000 = 0
Node1 Score: (0 + 0) / 2 = 0
Node2 scores (used resources) on 0-10 scale
CPU Score: (0 * 10 / 4000 = 0
Memory Score: (0 * 10 / 10000 = 0
Node2 Score: (0 + 0) / 2 = 0
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (3000 * 10 / 4000 = 7.5
Memory Score: (5000 * 10) / 10000 = 5
Node1 Score: (7.5 + 5) / 2 = 6
Node2 scores on 0-10 scale
CPU Score: (3000 * 10 / 6000 = 5
Memory Score: (5000 * 10 / 10000 = 5
Node2 Score: (5 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (0 * 10) / 20000 = 10
Node1 Score: (6 + 0) / 2 = 3
Node2 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (5000 * 10) / 20000 = 2.5
Node2 Score: (6 + 2.5) / 2 = 4
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (5000 * 10) / 20000 = 2.5
Node1 Score: (6 + 2.5) / 2 = 4
Node2 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (10000 * 10) / 20000 = 5
Node2 Score: (6 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: 5000 > 4000 return 0
Memory Score: (9000 * 10) / 10000 = 9
Node1 Score: (0 + 9) / 2 = 4
Node2 scores on 0-10 scale
CPU Score: (5000 * 10) / 10000 = 5
Memory Score: 9000 > 8000 return 0
Node2 Score: (5 + 0) / 2 = 2
*/
pod: &v1.Pod{Spec: bigCpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 10000, 8000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 2}},
test: "resources requested with more than the node, pods scheduled with resources",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(MostRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,103 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// CalculateNodeAffinityPriority prioritizes nodes according to node affinity scheduling preferences
// indicated in PreferredDuringSchedulingIgnoredDuringExecution. Each time a node match a preferredSchedulingTerm,
// it will a get an add of preferredSchedulingTerm.Weight. Thus, the more preferredSchedulingTerms
// the node satisfies and the more the preferredSchedulingTerm that is satisfied weights, the higher
// score the node gets.
func CalculateNodeAffinityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var affinity *v1.Affinity
if priorityMeta, ok := meta.(*priorityMetadata); ok {
affinity = priorityMeta.affinity
} else {
// We couldn't parse metadata - fallback to the podspec.
affinity = pod.Spec.Affinity
}
var count int32
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
// empty PreferredSchedulingTerm matches all objects.
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
for i := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
preferredSchedulingTerm := &affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution[i]
if preferredSchedulingTerm.Weight == 0 {
continue
}
// TODO: Avoid computing it for all nodes if this becomes a performance problem.
nodeSelector, err := v1helper.NodeSelectorRequirementsAsSelector(preferredSchedulingTerm.Preference.MatchExpressions)
if err != nil {
return schedulerapi.HostPriority{}, err
}
if nodeSelector.Matches(labels.Set(node.Labels)) {
count += preferredSchedulingTerm.Weight
}
}
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int(count),
}, nil
}
func CalculateNodeAffinityPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
var maxCount int
for i := range result {
if result[i].Score > maxCount {
maxCount = result[i].Score
}
}
maxCountFloat := float64(maxCount)
var fScore float64
for i := range result {
if maxCount > 0 {
fScore = float64(schedulerapi.MaxPriority) * (float64(result[i].Score) / maxCountFloat)
} else {
fScore = 0
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("%v -> %v: NodeAffinityPriority, Score: (%d)", pod.Name, result[i].Host, int(fScore))
}
result[i].Score = int(fScore)
}
return nil
}

View file

@ -0,0 +1,179 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNodeAffinityPriority(t *testing.T) {
label1 := map[string]string{"foo": "bar"}
label2 := map[string]string{"key": "value"}
label3 := map[string]string{"az": "az1"}
label4 := map[string]string{"abc": "az11", "def": "az22"}
label5 := map[string]string{"foo": "bar", "key": "value", "az": "az1"}
affinity1 := &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{{
Weight: 2,
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
}},
},
}},
},
}
affinity2 := &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{
{
Weight: 2,
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
},
},
},
{
Weight: 4,
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "key",
Operator: v1.NodeSelectorOpIn,
Values: []string{"value"},
},
},
},
},
{
Weight: 5,
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
{
Key: "key",
Operator: v1.NodeSelectorOpIn,
Values: []string{"value"},
},
{
Key: "az",
Operator: v1.NodeSelectorOpIn,
Values: []string{"az1"},
},
},
},
},
},
},
}
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{},
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as NodeAffinity is nil",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: affinity1,
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label4}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "no machine macthes preferred scheduling requirements in NodeAffinity of pod so all machines' priority is zero",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: affinity1,
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "only machine1 matches the preferred scheduling requirements of pod",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: affinity2,
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: label5}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine5", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 3}},
test: "all machines matches the preferred scheduling requirements of pod but with different priorities ",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce)
list, err := nap(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected %#v, \ngot %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type NodeLabelPrioritizer struct {
label string
presence bool
}
func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
labelPrioritizer := &NodeLabelPrioritizer{
label: label,
presence: presence,
}
return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
}
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
// If presence is false, prioritizes nodes that do not have the specified label.
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
exists := labels.Set(node.Labels).Has(n.label)
score := 0
if (exists && n.presence) || (!exists && !n.presence) {
score = schedulerapi.MaxPriority
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: score,
}, nil
}

View file

@ -0,0 +1,122 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNewNodeLabelPriority(t *testing.T) {
label1 := map[string]string{"foo": "bar"}
label2 := map[string]string{"bar": "foo"}
label3 := map[string]string{"bar": "baz"}
tests := []struct {
nodes []*v1.Node
label string
presence bool
expectedList schedulerapi.HostPriorityList
test string
}{
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "baz",
presence: true,
test: "no match found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "baz",
presence: false,
test: "no match found, presence false",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "foo",
presence: true,
test: "one match found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "foo",
presence: false,
test: "one match found, presence false",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "bar",
presence: true,
test: "two matches found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "bar",
presence: false,
test: "two matches found, presence false",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(NewNodeLabelPriority(test.label, test.presence))(nil, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,59 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func CalculateNodePreferAvoidPodsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
controllerRef := priorityutil.GetControllerRef(pod)
if controllerRef != nil {
// Ignore pods that are owned by other controller than ReplicationController
// or ReplicaSet.
if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
controllerRef = nil
}
}
if controllerRef == nil {
return schedulerapi.HostPriority{Host: node.Name, Score: schedulerapi.MaxPriority}, nil
}
avoids, err := v1helper.GetAvoidPodsFromNodeAnnotations(node.Annotations)
if err != nil {
// If we cannot get annotation, assume it's schedulable there.
return schedulerapi.HostPriority{Host: node.Name, Score: schedulerapi.MaxPriority}, nil
}
for i := range avoids.PreferAvoidPods {
avoid := &avoids.PreferAvoidPods[i]
if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
}
}
return schedulerapi.HostPriority{Host: node.Name, Score: schedulerapi.MaxPriority}, nil
}

View file

@ -0,0 +1,156 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNodePreferAvoidPriority(t *testing.T) {
annotations1 := map[string]string{
v1.PreferAvoidPodsAnnotationKey: `
{
"preferAvoidPods": [
{
"podSignature": {
"podController": {
"apiVersion": "v1",
"kind": "ReplicationController",
"name": "foo",
"uid": "abcdef123456",
"controller": true
}
},
"reason": "some reason",
"message": "some message"
}
]
}`,
}
annotations2 := map[string]string{
v1.PreferAvoidPodsAnnotationKey: `
{
"preferAvoidPods": [
{
"podSignature": {
"podController": {
"apiVersion": "v1",
"kind": "ReplicaSet",
"name": "foo",
"uid": "qwert12345",
"controller": true
}
},
"reason": "some reason",
"message": "some message"
}
]
}`,
}
testNodes := []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{Name: "machine1", Annotations: annotations1},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "machine2", Annotations: annotations2},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "machine3"},
},
}
trueVar := true
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{Kind: "RandomController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "ownership by random controller should be ignored",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456"},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "owner without Controller field set should be ignored",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{Kind: "ReplicaSet", Name: "foo", UID: "qwert12345", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,269 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"sync"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/util/workqueue"
utilnode "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// When zone information is present, give 2/3 of the weighting to zone spreading, 1/3 to node spreading
// TODO: Any way to justify this weighting?
const zoneWeighting float64 = 2.0 / 3.0
type SelectorSpread struct {
serviceLister algorithm.ServiceLister
controllerLister algorithm.ControllerLister
replicaSetLister algorithm.ReplicaSetLister
statefulSetLister algorithm.StatefulSetLister
}
func NewSelectorSpreadPriority(
serviceLister algorithm.ServiceLister,
controllerLister algorithm.ControllerLister,
replicaSetLister algorithm.ReplicaSetLister,
statefulSetLister algorithm.StatefulSetLister) algorithm.PriorityFunction {
selectorSpread := &SelectorSpread{
serviceLister: serviceLister,
controllerLister: controllerLister,
replicaSetLister: replicaSetLister,
statefulSetLister: statefulSetLister,
}
return selectorSpread.CalculateSpreadPriority
}
// Returns selectors of services, RCs and RSs matching the given pod.
func getSelectors(pod *v1.Pod, sl algorithm.ServiceLister, cl algorithm.ControllerLister, rsl algorithm.ReplicaSetLister, ssl algorithm.StatefulSetLister) []labels.Selector {
var selectors []labels.Selector
if services, err := sl.GetPodServices(pod); err == nil {
for _, service := range services {
selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
}
}
if rcs, err := cl.GetPodControllers(pod); err == nil {
for _, rc := range rcs {
selectors = append(selectors, labels.SelectorFromSet(rc.Spec.Selector))
}
}
if rss, err := rsl.GetPodReplicaSets(pod); err == nil {
for _, rs := range rss {
if selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
selectors = append(selectors, selector)
}
}
}
if sss, err := ssl.GetPodStatefulSets(pod); err == nil {
for _, ss := range sss {
if selector, err := metav1.LabelSelectorAsSelector(ss.Spec.Selector); err == nil {
selectors = append(selectors, selector)
}
}
}
return selectors
}
func (s *SelectorSpread) getSelectors(pod *v1.Pod) []labels.Selector {
return getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister, s.statefulSetLister)
}
// CalculateSpreadPriority spreads pods across hosts and zones, considering pods belonging to the same service or replication controller.
// When a pod is scheduled, it looks for services, RCs or RSs that match the pod, then finds existing pods that match those selectors.
// It favors nodes that have fewer existing matching pods.
// i.e. it pushes the scheduler towards a node where there's the smallest number of
// pods which match the same service, RC or RS selectors as the pod being scheduled.
// Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
func (s *SelectorSpread) CalculateSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
selectors := s.getSelectors(pod)
// Count similar pods by node
countsByNodeName := make(map[string]float64, len(nodes))
countsByZone := make(map[string]float64, 10)
maxCountByNodeName := float64(0)
countsByNodeNameLock := sync.Mutex{}
if len(selectors) > 0 {
processNodeFunc := func(i int) {
nodeName := nodes[i].Name
count := float64(0)
for _, nodePod := range nodeNameToInfo[nodeName].Pods() {
if pod.Namespace != nodePod.Namespace {
continue
}
// When we are replacing a failed pod, we often see the previous
// deleted version while scheduling the replacement.
// Ignore the previous deleted version for spreading purposes
// (it can still be considered for resource restrictions etc.)
if nodePod.DeletionTimestamp != nil {
glog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
continue
}
matches := false
for _, selector := range selectors {
if selector.Matches(labels.Set(nodePod.ObjectMeta.Labels)) {
matches = true
break
}
}
if matches {
count++
}
}
zoneId := utilnode.GetZoneKey(nodes[i])
countsByNodeNameLock.Lock()
defer countsByNodeNameLock.Unlock()
countsByNodeName[nodeName] = count
if count > maxCountByNodeName {
maxCountByNodeName = count
}
if zoneId != "" {
countsByZone[zoneId] += count
}
}
workqueue.Parallelize(16, len(nodes), processNodeFunc)
}
// Aggregate by-zone information
// Compute the maximum number of pods hosted in any zone
haveZones := len(countsByZone) != 0
maxCountByZone := float64(0)
for _, count := range countsByZone {
if count > maxCountByZone {
maxCountByZone = count
}
}
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
//score int - scale of 0-maxPriority
// 0 being the lowest priority and maxPriority being the highest
for _, node := range nodes {
// initializing to the default/max node score of maxPriority
fScore := float64(schedulerapi.MaxPriority)
if maxCountByNodeName > 0 {
fScore = float64(schedulerapi.MaxPriority) * ((maxCountByNodeName - countsByNodeName[node.Name]) / maxCountByNodeName)
}
// If there is zone information present, incorporate it
if haveZones {
zoneId := utilnode.GetZoneKey(node)
if zoneId != "" {
zoneScore := float64(schedulerapi.MaxPriority) * ((maxCountByZone - countsByZone[zoneId]) / maxCountByZone)
fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
}
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, node.Name, int(fScore),
)
}
}
return result, nil
}
type ServiceAntiAffinity struct {
podLister algorithm.PodLister
serviceLister algorithm.ServiceLister
label string
}
func NewServiceAntiAffinityPriority(podLister algorithm.PodLister, serviceLister algorithm.ServiceLister, label string) algorithm.PriorityFunction {
antiAffinity := &ServiceAntiAffinity{
podLister: podLister,
serviceLister: serviceLister,
label: label,
}
return antiAffinity.CalculateAntiAffinityPriority
}
// Classifies nodes into ones with labels and without labels.
func (s *ServiceAntiAffinity) getNodeClassificationByLabels(nodes []*v1.Node) (map[string]string, []string) {
labeledNodes := map[string]string{}
nonLabeledNodes := []string{}
for _, node := range nodes {
if labels.Set(node.Labels).Has(s.label) {
label := labels.Set(node.Labels).Get(s.label)
labeledNodes[node.Name] = label
} else {
nonLabeledNodes = append(nonLabeledNodes, node.Name)
}
}
return labeledNodes, nonLabeledNodes
}
// CalculateAntiAffinityPriority spreads pods by minimizing the number of pods belonging to the same service
// on machines with the same value for a particular label.
// The label to be considered is provided to the struct (ServiceAntiAffinity).
func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
var nsServicePods []*v1.Pod
if services, err := s.serviceLister.GetPodServices(pod); err == nil && len(services) > 0 {
// just use the first service and get the other pods within the service
// TODO: a separate predicate can be created that tries to handle all services for the pod
selector := labels.SelectorFromSet(services[0].Spec.Selector)
pods, err := s.podLister.List(selector)
if err != nil {
return nil, err
}
// consider only the pods that belong to the same namespace
for _, nsPod := range pods {
if nsPod.Namespace == pod.Namespace {
nsServicePods = append(nsServicePods, nsPod)
}
}
}
// separate out the nodes that have the label from the ones that don't
labeledNodes, nonLabeledNodes := s.getNodeClassificationByLabels(nodes)
podCounts := map[string]int{}
for _, pod := range nsServicePods {
label, exists := labeledNodes[pod.Spec.NodeName]
if !exists {
continue
}
podCounts[label]++
}
numServicePods := len(nsServicePods)
result := []schedulerapi.HostPriority{}
//score int - scale of 0-maxPriority
// 0 being the lowest priority and maxPriority being the highest
for node := range labeledNodes {
// initializing to the default/max node score of maxPriority
fScore := float64(schedulerapi.MaxPriority)
if numServicePods > 0 {
fScore = float64(schedulerapi.MaxPriority) * (float64(numServicePods-podCounts[labeledNodes[node]]) / float64(numServicePods))
}
result = append(result, schedulerapi.HostPriority{Host: node, Score: int(fScore)})
}
// add the open nodes with a score of 0
for _, node := range nonLabeledNodes {
result = append(result, schedulerapi.HostPriority{Host: node, Score: 0})
}
return result, nil
}

View file

@ -0,0 +1,780 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
apps "k8s.io/api/apps/v1beta1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/plugin/pkg/scheduler/testing"
)
func controllerRef(kind, name, uid string) []metav1.OwnerReference {
// TODO: When ControllerRef will be implemented uncomment code below.
return nil
//trueVar := true
//return []metav1.OwnerReference{
// {Kind: kind, Name: name, UID: types.UID(uid), Controller: &trueVar},
//}
}
func TestSelectorSpreadPriority(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
zone1Spec := v1.PodSpec{
NodeName: "machine1",
}
zone2Spec := v1.PodSpec{
NodeName: "machine2",
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []string
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: new(v1.Pod),
nodes: []string{"machine1", "machine2"},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec}},
nodes: []string{"machine1", "machine2"},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}}},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"key": "value"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "different services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "two pods, one service pod",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "five pods, one service pod in no namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "four pods, one service pod in default namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns2"}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: "ns1"}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "five pods, one service pod in specific namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "three pods, two service pods on different machines",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 0}},
test: "four pods, three service pods",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
// "baz=blah" matches both labels1 and labels2, and "foo=bar" matches only labels 1. This means that we assume that we want to
// do spreading between all pods. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replication controller",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replica set",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replica set",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
// Taken together Service and Replication Controller should match all Pods, hence result should be equal to one above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replication controller should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replica set should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replica set should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
// Both Nodes have one pod from the given RC, hence both get 0 score.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "Replication controller with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "Replica set with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use StatefulSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "StatefulSet with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another replication controller with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another replication set with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use StatefulSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another stateful set with partial pod label matches",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nil)
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}
func buildPod(nodeName string, labels map[string]string, ownerRefs []metav1.OwnerReference) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Labels: labels, OwnerReferences: ownerRefs},
Spec: v1.PodSpec{NodeName: nodeName},
}
}
func TestZoneSelectorSpreadPriority(t *testing.T) {
labels1 := map[string]string{
"label1": "l1",
"baz": "blah",
}
labels2 := map[string]string{
"label2": "l2",
"baz": "blah",
}
const nodeMachine1Zone1 = "machine1.zone1"
const nodeMachine1Zone2 = "machine1.zone2"
const nodeMachine2Zone2 = "machine2.zone2"
const nodeMachine1Zone3 = "machine1.zone3"
const nodeMachine2Zone3 = "machine2.zone3"
const nodeMachine3Zone3 = "machine3.zone3"
buildNodeLabels := func(failureDomain string) map[string]string {
labels := map[string]string{
kubeletapis.LabelZoneFailureDomain: failureDomain,
}
return labels
}
labeledNodes := map[string]map[string]string{
nodeMachine1Zone1: buildNodeLabels("zone1"),
nodeMachine1Zone2: buildNodeLabels("zone2"),
nodeMachine2Zone2: buildNodeLabels("zone2"),
nodeMachine1Zone3: buildNodeLabels("zone3"),
nodeMachine2Zone3: buildNodeLabels("zone3"),
nodeMachine3Zone3: buildNodeLabels("zone3"),
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []string
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: new(v1.Pod),
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "nothing scheduled",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{buildPod(nodeMachine1Zone1, nil, nil)},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "no services",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{buildPod(nodeMachine1Zone1, labels2, nil)},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"key": "value"}}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "different services",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone1, labels2, nil),
buildPod(nodeMachine1Zone2, labels1, nil),
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: 0}, // Already have pod on machine
{Host: nodeMachine2Zone2, Score: 3}, // Already have pod in zone
{Host: nodeMachine1Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "two pods, 1 matching (in z2)",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone1, labels2, nil),
buildPod(nodeMachine1Zone2, labels1, nil),
buildPod(nodeMachine2Zone2, labels1, nil),
buildPod(nodeMachine1Zone3, labels2, nil),
buildPod(nodeMachine2Zone3, labels1, nil),
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: 0}, // Pod on node
{Host: nodeMachine2Zone2, Score: 0}, // Pod on node
{Host: nodeMachine1Zone3, Score: 6}, // Pod in zone
{Host: nodeMachine2Zone3, Score: 3}, // Pod on node
{Host: nodeMachine3Zone3, Score: 6}, // Pod in zone
},
test: "five pods, 3 matching (z2=2, z3=1)",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone1, labels1, nil),
buildPod(nodeMachine1Zone2, labels1, nil),
buildPod(nodeMachine2Zone2, labels2, nil),
buildPod(nodeMachine1Zone3, labels1, nil),
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: 0}, // Pod on node
{Host: nodeMachine1Zone2, Score: 0}, // Pod on node
{Host: nodeMachine2Zone2, Score: 3}, // Pod in zone
{Host: nodeMachine1Zone3, Score: 0}, // Pod on node
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "four pods, 3 matching (z1=1, z2=1, z3=1)",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone1, labels1, nil),
buildPod(nodeMachine1Zone2, labels1, nil),
buildPod(nodeMachine1Zone3, labels1, nil),
buildPod(nodeMachine2Zone2, labels2, nil),
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: 0}, // Pod on node
{Host: nodeMachine1Zone2, Score: 0}, // Pod on node
{Host: nodeMachine2Zone2, Score: 3}, // Pod in zone
{Host: nodeMachine1Zone3, Score: 0}, // Pod on node
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "four pods, 3 matching (z1=1, z2=1, z3=1)",
},
{
pod: buildPod("", labels1, controllerRef("ReplicationController", "name", "abc123")),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone3, labels1, controllerRef("ReplicationController", "name", "abc123")),
buildPod(nodeMachine1Zone2, labels1, controllerRef("ReplicationController", "name", "abc123")),
buildPod(nodeMachine1Zone3, labels1, controllerRef("ReplicationController", "name", "abc123")),
},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
// Note that because we put two pods on the same node (nodeMachine1Zone3),
// the values here are questionable for zone2, in particular for nodeMachine1Zone2.
// However they kind of make sense; zone1 is still most-highly favored.
// zone3 is in general least favored, and m1.z3 particularly low priority.
// We would probably prefer to see a bigger gap between putting a second
// pod on m1.z2 and putting a pod on m2.z2, but the ordering is correct.
// This is also consistent with what we have already.
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority}, // No pods in zone
{Host: nodeMachine1Zone2, Score: 5}, // Pod on node
{Host: nodeMachine2Zone2, Score: 6}, // Pod in zone
{Host: nodeMachine1Zone3, Score: 0}, // Two pods on node
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "Replication controller spreading (z1=0, z2=1, z3=2)",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nil)
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}
func TestZoneSpreadPriority(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
zone1 := map[string]string{
"zone": "zone1",
}
zone2 := map[string]string{
"zone": "zone2",
}
nozone := map[string]string{
"name": "value",
}
zone0Spec := v1.PodSpec{
NodeName: "machine01",
}
zone1Spec := v1.PodSpec{
NodeName: "machine11",
}
zone2Spec := v1.PodSpec{
NodeName: "machine21",
}
labeledNodes := map[string]map[string]string{
"machine01": nozone, "machine02": nozone,
"machine11": zone1, "machine12": zone1,
"machine21": zone2, "machine22": zone2,
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes map[string]map[string]string
services []*v1.Service
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: new(v1.Pod),
nodes: labeledNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "nothing scheduled",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec}},
nodes: labeledNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "no services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}}},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"key": "value"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "different services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone0Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: 0}, {Host: "machine22", Score: 0},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three pods, one service pod",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 5}, {Host: "machine12", Score: 5},
{Host: "machine21", Score: 5}, {Host: "machine22", Score: 5},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three pods, two service pods on different machines",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 0}, {Host: "machine12", Score: 0},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three service label match pods in different namespaces",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 6}, {Host: "machine12", Score: 6},
{Host: "machine21", Score: 3}, {Host: "machine22", Score: 3},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "four pods, three service pods",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 3}, {Host: "machine12", Score: 3},
{Host: "machine21", Score: 6}, {Host: "machine22", Score: 6},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "service with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone0Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 7}, {Host: "machine12", Score: 7},
{Host: "machine21", Score: 5}, {Host: "machine22", Score: 5},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "service pod on non-zoned node",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nil)
zoneSpread := ServiceAntiAffinity{podLister: schedulertesting.FakePodLister(test.pods), serviceLister: schedulertesting.FakeServiceLister(test.services), label: "zone"}
list, err := zoneSpread.CalculateAntiAffinityPriority(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}
func TestGetNodeClassificationByLabels(t *testing.T) {
const machine01 = "machine01"
const machine02 = "machine02"
const zoneA = "zoneA"
zone1 := map[string]string{
"zone": zoneA,
}
labeledNodes := map[string]map[string]string{
machine01: zone1,
}
expectedNonLabeledNodes := []string{machine02}
serviceAffinity := ServiceAntiAffinity{label: "zone"}
newLabeledNodes, noNonLabeledNodes := serviceAffinity.getNodeClassificationByLabels(makeLabeledNodeList(labeledNodes))
noLabeledNodes, newnonLabeledNodes := serviceAffinity.getNodeClassificationByLabels(makeNodeList(expectedNonLabeledNodes))
label, _ := newLabeledNodes[machine01]
if label != zoneA && len(noNonLabeledNodes) != 0 {
t.Errorf("Expected only labeled node with label zoneA and no noNonLabeledNodes")
}
if len(noLabeledNodes) != 0 && newnonLabeledNodes[0] != machine02 {
t.Errorf("Expected only non labled nodes")
}
}
func makeLabeledNodeList(nodeMap map[string]map[string]string) []*v1.Node {
nodes := make([]*v1.Node, 0, len(nodeMap))
for nodeName, labels := range nodeMap {
nodes = append(nodes, &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName, Labels: labels}})
}
return nodes
}
func makeNodeList(nodeNames []string) []*v1.Node {
nodes := make([]*v1.Node, 0, len(nodeNames))
for _, nodeName := range nodeNames {
nodes = append(nodes, &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}})
}
return nodes
}

View file

@ -0,0 +1,100 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// CountIntolerableTaintsPreferNoSchedule gives the count of intolerable taints of a pod with effect PreferNoSchedule
func countIntolerableTaintsPreferNoSchedule(taints []v1.Taint, tolerations []v1.Toleration) (intolerableTaints int) {
for _, taint := range taints {
// check only on taints that have effect PreferNoSchedule
if taint.Effect != v1.TaintEffectPreferNoSchedule {
continue
}
if !v1helper.TolerationsTolerateTaint(tolerations, &taint) {
intolerableTaints++
}
}
return
}
// getAllTolerationEffectPreferNoSchedule gets the list of all Tolerations with Effect PreferNoSchedule or with no effect.
func getAllTolerationPreferNoSchedule(tolerations []v1.Toleration) (tolerationList []v1.Toleration) {
for _, toleration := range tolerations {
// Empty effect means all effects which includes PreferNoSchedule, so we need to collect it as well.
if len(toleration.Effect) == 0 || toleration.Effect == v1.TaintEffectPreferNoSchedule {
tolerationList = append(tolerationList, toleration)
}
}
return
}
// ComputeTaintTolerationPriorityMap prepares the priority list for all the nodes based on the number of intolerable taints on the node
func ComputeTaintTolerationPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
// To hold all the tolerations with Effect PreferNoSchedule
var tolerationsPreferNoSchedule []v1.Toleration
if priorityMeta, ok := meta.(*priorityMetadata); ok {
tolerationsPreferNoSchedule = priorityMeta.podTolerations
} else {
tolerationsPreferNoSchedule = getAllTolerationPreferNoSchedule(pod.Spec.Tolerations)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: countIntolerableTaintsPreferNoSchedule(node.Spec.Taints, tolerationsPreferNoSchedule),
}, nil
}
// ComputeTaintTolerationPriorityReduce calculates the source of each node based on the number of intolerable taints on the node
func ComputeTaintTolerationPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
var maxCount int
for i := range result {
if result[i].Score > maxCount {
maxCount = result[i].Score
}
}
maxCountFloat := float64(maxCount)
for i := range result {
fScore := float64(schedulerapi.MaxPriority)
if maxCountFloat > 0 {
fScore = (1.0 - float64(result[i].Score)/maxCountFloat) * float64(schedulerapi.MaxPriority)
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("%v -> %v: Taint Toleration Priority, Score: (%d)", pod.Name, result[i].Host, int(fScore))
}
result[i].Score = int(fScore)
}
return nil
}

View file

@ -0,0 +1,241 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func nodeWithTaints(nodeName string, taints []v1.Taint) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
},
Spec: v1.NodeSpec{
Taints: taints,
},
}
}
func podWithTolerations(tolerations []v1.Toleration) *v1.Pod {
return &v1.Pod{
Spec: v1.PodSpec{
Tolerations: tolerations,
},
}
}
// This function will create a set of nodes and pods and test the priority
// Nodes with zero,one,two,three,four and hundred taints are created
// Pods with zero,one,two,three,four and hundred tolerations are created
func TestTaintAndToleration(t *testing.T) {
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
// basic test case
{
test: "node with taints tolerated by the pod, gets a higher score than those node with intolerable taints",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{{
Key: "foo",
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodeWithTaints("nodeB", []v1.Taint{{
Key: "foo",
Value: "blah",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: 0},
},
},
// the count of taints that are tolerated by pod, does not matter.
{
test: "the nodes that all of their taints are tolerated by the pod, get the same score, no matter how many tolerable taints a node has",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
Operator: v1.TolerationOpEqual,
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Operator: v1.TolerationOpEqual,
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: schedulerapi.MaxPriority},
{Host: "nodeC", Score: schedulerapi.MaxPriority},
},
},
// the count of taints on a node that are not tolerated by pod, matters.
{
test: "the more intolerable taints a node has, the lower score it gets.",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: 5},
{Host: "nodeC", Score: 0},
},
},
// taints-tolerations priority only takes care about the taints and tolerations that have effect PreferNoSchedule
{
test: "only taints and tolerations that have effect PreferNoSchedule are checked by taints-tolerations priority function",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
Operator: v1.TolerationOpEqual,
Value: "arm64",
Effect: v1.TaintEffectNoSchedule,
}, {
Key: "disk-type",
Operator: v1.TolerationOpEqual,
Value: "ssd",
Effect: v1.TaintEffectNoSchedule,
},
}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: schedulerapi.MaxPriority},
{Host: "nodeC", Score: 0},
},
},
{
test: "Default behaviour No taints and tolerations, lands on node with no taints",
//pod without tolerations
pod: podWithTolerations([]v1.Toleration{}),
nodes: []*v1.Node{
//Node without taints
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: 0},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce)
list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("%s, unexpected error: %v", test.test, err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s,\nexpected:\n\t%+v,\ngot:\n\t%+v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,61 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func makeNode(node string, milliCPU, memory int64) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: node},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
},
},
}
}
func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction {
return func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for i := range nodes {
hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name])
if err != nil {
return nil, err
}
result = append(result, hostResult)
}
if reduceFn != nil {
if err := reduceFn(pod, nil, nodeNameToInfo, result); err != nil {
return nil, err
}
}
return result, nil
}
}

View file

@ -0,0 +1,45 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = ["topologies_test.go"],
library = ":go_default_library",
deps = [
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"non_zero.go",
"topologies.go",
"util.go",
],
deps = [
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View file

@ -0,0 +1,50 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "k8s.io/api/core/v1"
// For each of these resources, a pod that doesn't request the resource explicitly
// will be treated as having requested the amount indicated below, for the purpose
// of computing priority only. This ensures that when scheduling zero-request pods, such
// pods will not all be scheduled to the machine with the smallest in-use request,
// and that when scheduling regular pods, such pods will not see zero-request pods as
// consuming no resources whatsoever. We chose these values to be similar to the
// resources that we give to cluster addon pods (#10653). But they are pretty arbitrary.
// As described in #11713, we use request instead of limit to deal with resource requirements.
const DefaultMilliCpuRequest int64 = 100 // 0.1 core
const DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
// GetNonzeroRequests returns the default resource request if none is found or what is provided on the request
// TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity v1.ResourceList"
// as an additional argument here) rather than using constants
func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
var outMilliCPU, outMemory int64
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceCPU]; !found {
outMilliCPU = DefaultMilliCpuRequest
} else {
outMilliCPU = requests.Cpu().MilliValue()
}
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceMemory]; !found {
outMemory = DefaultMemoryRequest
} else {
outMemory = requests.Memory().Value()
}
return outMilliCPU, outMemory
}

View file

@ -0,0 +1,80 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
)
// GetNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.String {
names := sets.String{}
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.String, selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}
if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
// Returns false if topologyKey is empty.
func NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
if len(topologyKey) == 0 {
return false
}
if nodeA.Labels == nil || nodeB.Labels == nil {
return false
}
nodeALabel, okA := nodeA.Labels[topologyKey]
nodeBLabel, okB := nodeB.Labels[topologyKey]
// If found label in both nodes, check the label
if okB && okA {
return nodeALabel == nodeBLabel
}
return false
}
type Topologies struct {
DefaultKeys []string
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
func (tps *Topologies) NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
return NodesHaveSameTopologyKey(nodeA, nodeB, topologyKey)
}

View file

@ -0,0 +1,124 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func TestNodesHaveSameTopologyKey(t *testing.T) {
tests := []struct {
name string
nodeA, nodeB *v1.Node
topologyKey string
expected bool
}{
{
name: "nodeA{'a':'a'} vs. empty label in nodeB",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
nodeB: &v1.Node{},
expected: false,
topologyKey: "a",
},
{
name: "nodeA{'a':'a'} vs. nodeB{'a':'a'}",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
nodeB: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
expected: true,
topologyKey: "a",
},
{
name: "nodeA{'a':''} vs. empty label in nodeB",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
nodeB: &v1.Node{},
expected: false,
topologyKey: "a",
},
{
name: "nodeA{'a':''} vs. nodeB{'a':''}",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
nodeB: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
expected: true,
topologyKey: "a",
},
{
name: "nodeA{'a':'a'} vs. nodeB{'a':'a'} by key{'b'}",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
nodeB: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
expected: false,
topologyKey: "b",
},
}
for _, test := range tests {
got := NodesHaveSameTopologyKey(test.nodeA, test.nodeB, test.topologyKey)
if test.expected != got {
t.Errorf("%v: expected %t, got %t", test.name, test.expected, got)
}
}
}

View file

@ -0,0 +1,35 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func GetControllerRef(pod *v1.Pod) *metav1.OwnerReference {
if len(pod.OwnerReferences) == 0 {
return nil
}
for i := range pod.OwnerReferences {
ref := &pod.OwnerReferences[i]
if ref.Controller != nil && *ref.Controller {
return ref
}
}
return nil
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// SchedulerExtender is an interface for external processes to influence scheduling
// decisions made by Kubernetes. This is typically needed for resources not directly
// managed by Kubernetes.
type SchedulerExtender interface {
// Filter based on extender-implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
// Prioritize based on extender-implemented priority functions. The returned scores & weight
// are used to compute the weighted score for an extender. The weighted scores are added to
// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error)
// Bind delegates the action of binding a pod to a node to the extender.
Bind(binding *v1.Binding) error
// IsBinder returns whether this extender is configured for the Bind method.
IsBinder() bool
}
// ScheduleAlgorithm is an interface implemented by things that know how to schedule pods
// onto machines.
type ScheduleAlgorithm interface {
Schedule(*v1.Pod, NodeLister) (selectedMachine string, err error)
// Preempt receives scheduling errors for a pod and tries to create room for
// the pod by preempting lower priority pods if possible.
// It returns the node where preemption happened, a list of preempted pods, and error if any.
Preempt(*v1.Pod, NodeLister, error) (selectedNode *v1.Node, preemptedPods []*v1.Pod, err error)
// Predicates() returns a pointer to a map of predicate functions. This is
// exposed for testing.
Predicates() map[string]FitPredicate
// Prioritizers returns a slice of priority config. This is exposed for
// testing.
Prioritizers() []PriorityConfig
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"testing"
"k8s.io/api/core/v1"
)
// Some functions used by multiple scheduler tests.
type schedulerTester struct {
t *testing.T
scheduler ScheduleAlgorithm
nodeLister NodeLister
}
// Call if you know exactly where pod should get scheduled.
func (st *schedulerTester) expectSchedule(pod *v1.Pod, expected string) {
actual, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
if actual != expected {
st.t.Errorf("Unexpected scheduling value: %v, expected %v", actual, expected)
}
}
// Call if you can't predict where pod will be scheduled.
func (st *schedulerTester) expectSuccess(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
}
// Call if pod should *not* schedule.
func (st *schedulerTester) expectFailure(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err == nil {
st.t.Error("Unexpected non-error")
}
}

View file

@ -0,0 +1,164 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
apps "k8s.io/api/apps/v1beta1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/labels"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// FitPredicate is a function that indicates if a pod fits into an existing node.
// The failure information is given by the error.
type FitPredicate func(pod *v1.Pod, meta PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
// PriorityMapFunction is a function that computes per-node results for a given node.
// TODO: Figure out the exact API of this method.
// TODO: Change interface{} to a specific type.
type PriorityMapFunction func(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error)
// PriorityReduceFunction is a function that aggregated per-node results and computes
// final scores for all nodes.
// TODO: Figure out the exact API of this method.
// TODO: Change interface{} to a specific type.
type PriorityReduceFunction func(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error
// PredicateMetadataProducer is a function that computes predicate metadata for a given pod.
type PredicateMetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) PredicateMetadata
// MetadataProducer is a function that computes metadata for a given pod. This
// is now used for only for priority functions. For predicates please use PredicateMetadataProducer.
// TODO: Rename this once we have a specific type for priority metadata producer.
type MetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{}
// DEPRECATED
// Use Map-Reduce pattern for priority functions.
type PriorityFunction func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error)
type PriorityConfig struct {
Map PriorityMapFunction
Reduce PriorityReduceFunction
// TODO: Remove it after migrating all functions to
// Map-Reduce pattern.
Function PriorityFunction
Weight int
}
// EmptyPredicateMetadataProducer returns a no-op MetadataProducer type.
func EmptyPredicateMetadataProducer(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) PredicateMetadata {
return nil
}
// EmptyMetadataProducer returns a no-op MetadataProducer type.
func EmptyMetadataProducer(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
return nil
}
type PredicateFailureReason interface {
GetReason() string
}
type GetEquivalencePodFunc func(pod *v1.Pod) interface{}
// NodeLister interface represents anything that can list nodes for a scheduler.
type NodeLister interface {
// We explicitly return []*v1.Node, instead of v1.NodeList, to avoid
// performing expensive copies that are unneeded.
List() ([]*v1.Node, error)
}
// PodLister interface represents anything that can list pods for a scheduler.
type PodLister interface {
// We explicitly return []*v1.Pod, instead of v1.PodList, to avoid
// performing expensive copies that are unneeded.
List(labels.Selector) ([]*v1.Pod, error)
// This is similar to "List()", but the returned slice does not
// contain pods that don't pass `podFilter`.
FilteredList(podFilter schedulercache.PodFilter, selector labels.Selector) ([]*v1.Pod, error)
}
// ServiceLister interface represents anything that can produce a list of services; the list is consumed by a scheduler.
type ServiceLister interface {
// Lists all the services
List(labels.Selector) ([]*v1.Service, error)
// Gets the services for the given pod
GetPodServices(*v1.Pod) ([]*v1.Service, error)
}
// ControllerLister interface represents anything that can produce a list of ReplicationController; the list is consumed by a scheduler.
type ControllerLister interface {
// Lists all the replication controllers
List(labels.Selector) ([]*v1.ReplicationController, error)
// Gets the services for the given pod
GetPodControllers(*v1.Pod) ([]*v1.ReplicationController, error)
}
// ReplicaSetLister interface represents anything that can produce a list of ReplicaSet; the list is consumed by a scheduler.
type ReplicaSetLister interface {
// Gets the replicasets for the given pod
GetPodReplicaSets(*v1.Pod) ([]*extensions.ReplicaSet, error)
}
var _ ControllerLister = &EmptyControllerLister{}
// EmptyControllerLister implements ControllerLister on []v1.ReplicationController returning empty data
type EmptyControllerLister struct{}
// List returns nil
func (f EmptyControllerLister) List(labels.Selector) ([]*v1.ReplicationController, error) {
return nil, nil
}
// GetPodControllers returns nil
func (f EmptyControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1.ReplicationController, err error) {
return nil, nil
}
var _ ReplicaSetLister = &EmptyReplicaSetLister{}
// EmptyReplicaSetLister implements ReplicaSetLister on []extensions.ReplicaSet returning empty data
type EmptyReplicaSetLister struct{}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*extensions.ReplicaSet, err error) {
return nil, nil
}
// StatefulSetLister interface represents anything that can produce a list of StatefulSet; the list is consumed by a scheduler.
type StatefulSetLister interface {
// Gets the StatefulSet for the given pod.
GetPodStatefulSets(*v1.Pod) ([]*apps.StatefulSet, error)
}
var _ StatefulSetLister = &EmptyStatefulSetLister{}
// EmptyStatefulSetLister implements StatefulSetLister on []apps.StatefulSet returning empty data.
type EmptyStatefulSetLister struct{}
// GetPodStatefulSets of EmptyStatefulSetLister returns nil.
func (f EmptyStatefulSetLister) GetPodStatefulSets(pod *v1.Pod) (sss []*apps.StatefulSet, err error) {
return nil, nil
}
type PredicateMetadata interface {
ShallowCopy() PredicateMetadata
AddPod(addedPod *v1.Pod, nodeInfo *schedulercache.NodeInfo) error
RemovePod(deletedPod *v1.Pod) error
}

View file

@ -0,0 +1,65 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// EmptyMetadataProducer should returns a no-op MetadataProducer type.
func TestEmptyMetadataProducer(t *testing.T) {
fakePod := new(v1.Pod)
fakeLabelSelector := labels.SelectorFromSet(labels.Set{"foo": "bar"})
nodeNameToInfo := map[string]*schedulercache.NodeInfo{
"2": schedulercache.NewNodeInfo(fakePod),
"1": schedulercache.NewNodeInfo(),
}
// Test EmptyMetadataProducer
metadata := EmptyMetadataProducer(fakePod, nodeNameToInfo)
if metadata != nil {
t.Errorf("failed to produce empty metadata: got %v, expected nil", metadata)
}
// Test EmptyControllerLister should return nill
controllerLister := EmptyControllerLister{}
nilController, nilError := controllerLister.List(fakeLabelSelector)
if nilController != nil || nilError != nil {
t.Errorf("failed to produce empty controller lister: got %v, expected nil", nilController)
}
// Test GetPodControllers on empty controller lister should return nill
nilController, nilError = controllerLister.GetPodControllers(fakePod)
if nilController != nil || nilError != nil {
t.Errorf("failed to produce empty controller lister: got %v, expected nil", nilController)
}
// Test GetPodReplicaSets on empty replica sets should return nill
replicaSetLister := EmptyReplicaSetLister{}
nilRss, nilErrRss := replicaSetLister.GetPodReplicaSets(fakePod)
if nilRss != nil || nilErrRss != nil {
t.Errorf("failed to produce empty replicaSetLister: got %v, expected nil", nilRss)
}
// Test GetPodStatefulSets on empty replica sets should return nill
statefulSetLister := EmptyStatefulSetLister{}
nilSSL, nilErrSSL := statefulSetLister.GetPodStatefulSets(fakePod)
if nilSSL != nil || nilErrSSL != nil {
t.Errorf("failed to produce empty statefulSetLister: got %v, expected nil", nilSSL)
}
}

View file

@ -0,0 +1,56 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
const (
// When feature-gate for TaintBasedEvictions=true flag is enabled,
// TaintNodeNotReady would be automatically added by node controller
// when node is not ready, and removed when node becomes ready.
TaintNodeNotReady = "node.alpha.kubernetes.io/notReady"
// When feature-gate for TaintBasedEvictions=true flag is enabled,
// TaintNodeUnreachable would be automatically added by node controller
// when node becomes unreachable (corresponding to NodeReady status ConditionUnknown)
// and removed when node becomes reachable (NodeReady status ConditionTrue).
TaintNodeUnreachable = "node.alpha.kubernetes.io/unreachable"
// When feature-gate for TaintBasedEvictions=true flag is enabled,
// TaintNodeOutOfDisk would be automatically added by node controller
// when node becomes out of disk, and removed when node has enough disk.
TaintNodeOutOfDisk = "node.kubernetes.io/out-of-disk"
// When feature-gate for TaintBasedEvictions=true flag is enabled,
// TaintNodeMemoryPressure would be automatically added by node controller
// when node has memory pressure, and removed when node has enough memory.
TaintNodeMemoryPressure = "node.kubernetes.io/memory-pressure"
// When feature-gate for TaintBasedEvictions=true flag is enabled,
// TaintNodeDiskPressure would be automatically added by node controller
// when node has disk pressure, and removed when node has enough disk.
TaintNodeDiskPressure = "node.kubernetes.io/disk-pressure"
// When feature-gate for TaintBasedEvictions=true flag is enabled,
// TaintNodeNetworkUnavailable would be automatically added by node controller
// when node's network is unavailable, and removed when network becomes ready.
TaintNodeNetworkUnavailable = "node.kubernetes.io/network-unavailable"
// When kubelet is started with the "external" cloud provider, then
// it sets this taint on a node to mark it as unusable, until a controller
// from the cloud-controller-manager intitializes this node, and then removes
// the taint
TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized"
)