-
Notifications
You must be signed in to change notification settings - Fork 4.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7752 from medyagh/node_pressure_redo
Check node pressure & new option "node_ready" for --wait flag
- Loading branch information
Showing
9 changed files
with
328 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
142 changes: 142 additions & 0 deletions
142
pkg/minikube/bootstrapper/bsutil/kverify/node_conditions.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
/* | ||
Copyright 2020 The Kubernetes Authors All rights reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
// Package kverify verifies a running kubernetes cluster is healthy | ||
package kverify | ||
|
||
import ( | ||
"fmt" | ||
"time" | ||
|
||
"github.com/golang/glog" | ||
"github.com/pkg/errors" | ||
v1 "k8s.io/api/core/v1" | ||
meta "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/client-go/kubernetes" | ||
) | ||
|
||
// NodeCondition represents a favorable or unfavorable node condition. | ||
type NodeCondition struct { | ||
Type v1.NodeConditionType | ||
Status v1.ConditionStatus | ||
Reason string | ||
Message string | ||
} | ||
|
||
// DiskPressure detects if the condition is disk pressure | ||
func (pc *NodeCondition) DiskPressure() bool { | ||
return pc.Type == v1.NodeDiskPressure && pc.Status == v1.ConditionTrue | ||
} | ||
|
||
// MemoryPressure detects if the condition is memory pressure | ||
func (pc *NodeCondition) MemoryPressure() bool { | ||
return pc.Type == v1.NodeMemoryPressure && pc.Status == v1.ConditionTrue | ||
} | ||
|
||
// PIDPressure detects if the condition is PID pressure | ||
func (pc *NodeCondition) PIDPressure() bool { | ||
return pc.Type == v1.NodePIDPressure && pc.Status == v1.ConditionTrue | ||
} | ||
|
||
// NetworkUnavailable detects if the condition is PID pressure | ||
func (pc *NodeCondition) NetworkUnavailable() bool { | ||
return pc.Type == v1.NodeNetworkUnavailable && pc.Status == v1.ConditionTrue | ||
} | ||
|
||
const errTextFormat = "node has unwanted condition %q : Reason %q Message: %q" | ||
|
||
// ErrMemoryPressure is thrown when there is node memory pressure condition | ||
type ErrMemoryPressure struct { | ||
NodeCondition | ||
} | ||
|
||
func (e *ErrMemoryPressure) Error() string { | ||
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message) | ||
} | ||
|
||
// ErrDiskPressure is thrown when there is node disk pressure condition | ||
type ErrDiskPressure struct { | ||
NodeCondition | ||
} | ||
|
||
func (e *ErrDiskPressure) Error() string { | ||
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message) | ||
} | ||
|
||
// ErrPIDPressure is thrown when there is node PID pressure condition | ||
type ErrPIDPressure struct { | ||
NodeCondition | ||
} | ||
|
||
func (e *ErrPIDPressure) Error() string { | ||
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message) | ||
} | ||
|
||
// ErrNetworkNotReady is thrown when there is node condition is network not ready | ||
type ErrNetworkNotReady struct { | ||
NodeCondition | ||
} | ||
|
||
func (e *ErrNetworkNotReady) Error() string { | ||
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message) | ||
} | ||
|
||
// NodePressure verfies that node is not under disk, memory, pid or network pressure. | ||
func NodePressure(cs *kubernetes.Clientset) error { | ||
glog.Info("verifying NodePressure condition ...") | ||
start := time.Now() | ||
defer func() { | ||
glog.Infof("duration metric: took %s to run NodePressure ...", time.Since(start)) | ||
}() | ||
|
||
ns, err := cs.CoreV1().Nodes().List(meta.ListOptions{}) | ||
if err != nil { | ||
return errors.Wrap(err, "list nodes") | ||
} | ||
|
||
for _, n := range ns.Items { | ||
glog.Infof("node storage ephemeral capacity is %s", n.Status.Capacity.StorageEphemeral()) | ||
glog.Infof("node cpu capacity is %s", n.Status.Capacity.Cpu().AsDec()) | ||
for _, c := range n.Status.Conditions { | ||
pc := NodeCondition{Type: c.Type, Status: c.Status, Reason: c.Reason, Message: c.Message} | ||
if pc.DiskPressure() { | ||
return &ErrDiskPressure{ | ||
NodeCondition: pc, | ||
} | ||
} | ||
|
||
if pc.MemoryPressure() { | ||
return &ErrMemoryPressure{ | ||
NodeCondition: pc, | ||
} | ||
} | ||
|
||
if pc.PIDPressure() { | ||
return &ErrPIDPressure{ | ||
NodeCondition: pc, | ||
} | ||
} | ||
|
||
if pc.NetworkUnavailable() { | ||
return &ErrNetworkNotReady{ | ||
NodeCondition: pc, | ||
} | ||
} | ||
|
||
} | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
Copyright 2020 The Kubernetes Authors All rights reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
// Package kverify verifies a running kubernetes cluster is healthy | ||
package kverify | ||
|
||
import ( | ||
"fmt" | ||
"time" | ||
|
||
"github.com/golang/glog" | ||
"github.com/pkg/errors" | ||
v1 "k8s.io/api/core/v1" | ||
meta "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
"k8s.io/client-go/kubernetes" | ||
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" | ||
) | ||
|
||
// WaitForNodeReady waits till kube client reports node status as "ready" | ||
func WaitForNodeReady(cs *kubernetes.Clientset, timeout time.Duration) error { | ||
glog.Info("waiting for node status to be ready ...") | ||
start := time.Now() | ||
defer func() { | ||
glog.Infof("duration metric: took %s to wait for WaitForNodeReady...", time.Since(start)) | ||
}() | ||
checkReady := func() (bool, error) { | ||
if time.Since(start) > timeout { | ||
return false, fmt.Errorf("wait for node to be ready timed out") | ||
} | ||
ns, err := cs.CoreV1().Nodes().List(meta.ListOptions{}) | ||
if err != nil { | ||
glog.Infof("error listing nodes will retry: %v", err) | ||
return false, nil | ||
} | ||
|
||
for _, n := range ns.Items { | ||
for _, c := range n.Status.Conditions { | ||
if c.Type == v1.NodeReady && c.Status != v1.ConditionTrue { | ||
glog.Infof("node %q has unwanted condition %q : Reason %q Message: %q. will try. ", n.Name, c.Type, c.Reason, c.Message) | ||
return false, nil | ||
} | ||
} | ||
} | ||
return true, nil | ||
} | ||
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { | ||
return errors.Wrapf(err, "wait node ready") | ||
} | ||
return nil | ||
} |
Oops, something went wrong.