Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/extended/node/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This directory contains OpenShift end-to-end tests for node-related features.
- **image_volume.go** - Tests mounting container images as volumes in pods, including subPath and error handling
- **node_swap.go** - Tests default kubelet swap settings (failSwapOn and swapBehavior) and rejection of user overrides
- **zstd_chunked.go** - Tests building and running images with zstd:chunked compression format
- **node_e2e/node.go** - PodDisruptionBudget drain blocking (OCP-67564) - Tests that node drain is blocked when PDB has minAvailable=100% with empty selector [Disruptive] [Lifecycle:informing]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's maintainable to list all test cases here.
Probably we should group them to some extent.
This can be followed up though.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has been organic so far. I think it has reached a point where it does need to be more structured. Definitely a followup item.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bitoku @cpmeadors I agree this needs better structure. Here's a proposal for the README:
Instead of listing every test individually, we can group node_e2e/node.go tests by feature area:
node_e2e/node.go - General node/kubelet tests

Kubelet Configuration:

  • [OTP] validate KUBELET_LOG_LEVEL
  • [OTP] validate cgroupv2 is default [OCP-80983]

CRI-O:

  • [OTP] Allow dev fuse by default [OCP-70987]

Pod Disruption:

  • [OTP][Disruptive] PodDisruptionBudget drain blocking [OCP-67564]

...and so on for other categories.
I can submit this as a follow-up PR once these migrations PRs merge. Thoughts?


## Directory Structure

Expand Down
154 changes: 154 additions & 0 deletions test/extended/node/node_e2e/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,23 @@ import (

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"

configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/origin/test/extended/imagepolicy"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
utilrand "k8s.io/apimachinery/pkg/util/rand"
"k8s.io/apimachinery/pkg/util/wait"
e2e "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/utils/ptr"

nodeutils "github.com/openshift/origin/test/extended/node"
exutil "github.com/openshift/origin/test/extended/util"
"github.com/openshift/origin/test/extended/util/operator"
)

var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager", func() {
Expand Down Expand Up @@ -164,6 +171,153 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
e2e.Logf("/dev/fuse mount output: %s", output)
Comment thread
BhargaviGudi marked this conversation as resolved.
o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod")
})

//author: bgudi@redhat.com
g.It("[OTP] Node's drain should block when PodDisruptionBudget minAvailable equals 100 percentage and selector is empty [Disruptive] [OCP-67564]", ote.Informing(), func() {
ctx := context.Background()

// Skip on SNO/External topologies where there might not be dedicated worker nodes
infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get cluster infrastructure")
if infra.Status.ControlPlaneTopology == "SingleReplica" || infra.Status.ControlPlaneTopology == "External" {
g.Skip("Skipping on SNO/External topology - requires dedicated worker nodes")
}

oc.SetupProject()
namespace := oc.Namespace()

g.By("Create a deployment with 6 replicas")
replicas := int32(6)
deployment := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "hello-openshift",
Namespace: namespace,
Labels: map[string]string{
"app": "myapp",
},
},
Spec: appsv1.DeploymentSpec{
Replicas: &replicas,
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "myapp",
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Name: "myapp",
Labels: map[string]string{
"app": "myapp",
},
},
Spec: corev1.PodSpec{
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: ptr.To(true),
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "myapp",
Image: "quay.io/openshifttest/hello-openshift@sha256:4200f438cf2e9446f6bcff9d67ceea1f69ed07a2f83363b7fb52529f7ddd8a83",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: ptr.To(false),
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
},
},
},
},
},
}
_, err = oc.KubeClient().AppsV1().Deployments(namespace).Create(ctx, deployment, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create deployment")
g.DeferCleanup(oc.KubeClient().AppsV1().Deployments(namespace).Delete, ctx, "hello-openshift", metav1.DeleteOptions{})

g.By("Wait for deployment to be ready")
err = wait.PollUntilContextTimeout(ctx, 3*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
deploy, pollErr := oc.KubeClient().AppsV1().Deployments(namespace).Get(ctx, "hello-openshift", metav1.GetOptions{})
if pollErr != nil {
e2e.Logf("Error getting deployment: %v", pollErr)
return false, nil
}
if deploy.Status.ReadyReplicas == replicas {
e2e.Logf("Deployment is ready with %d replicas", deploy.Status.ReadyReplicas)
return true, nil
}
e2e.Logf("Waiting for deployment, ready replicas: %d/%d", deploy.Status.ReadyReplicas, replicas)
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "deployment did not become ready")

g.By("Create PodDisruptionBudget with 100% minAvailable")
pdb := &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: "my-pdb",
Namespace: namespace,
},
Spec: policyv1.PodDisruptionBudgetSpec{
MinAvailable: &intstr.IntOrString{
Type: intstr.String,
StrVal: "100%",
},
Selector: &metav1.LabelSelector{},
},
}
_, err = oc.KubeClient().PolicyV1().PodDisruptionBudgets(namespace).Create(ctx, pdb, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create PodDisruptionBudget")
g.DeferCleanup(oc.KubeClient().PolicyV1().PodDisruptionBudgets(namespace).Delete, ctx, "my-pdb", metav1.DeleteOptions{})

g.By("Get a single worker node")
workers, err := exutil.GetReadySchedulableWorkerNodes(ctx, oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get worker nodes")
o.Expect(workers).NotTo(o.BeEmpty(), "no ready schedulable worker nodes found")
workerNode := workers[0].Name
e2e.Logf("Selected worker node: %s", workerNode)

g.By("Obtain the pods running on the selected worker node")
podsInWorker, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-n", namespace, "-o=jsonpath={.items[?(@.spec.nodeName=='"+workerNode+"')].metadata.name}").Output()
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get pods on worker node")
o.Expect(len(strings.Fields(podsInWorker))).Should(o.BeNumerically(">", 0), "no pods found on worker node")

g.By("Make sure that PDB's DisruptionAllowed condition is False")
var pdbStatus string
err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 30*time.Second, true, func(pollCtx context.Context) (bool, error) {
var pollErr error
pdbStatus, pollErr = oc.AsAdmin().WithoutNamespace().Run("get").Args("poddisruptionbudget", "my-pdb", "-n", namespace, "-o=jsonpath={.status.conditions[?(@.type==\"DisruptionAllowed\")].status}").Output()
if pollErr != nil {
e2e.Logf("Error getting PDB status: %v", pollErr)
return false, nil
}
if pdbStatus != "" {
return true, nil
}
e2e.Logf("Waiting for PDB DisruptionAllowed condition to appear")
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "PDB DisruptionAllowed condition not found")
o.Expect(pdbStatus).Should(o.Equal("False"), "PDB DisruptionAllowed should be False")

g.By("Drain the selected worker node")
g.DeferCleanup(func() {
err := operator.WaitForOperatorsToSettle(ctx, oc.AdminConfigClient(), 10)
o.Expect(err).NotTo(o.HaveOccurred(), "cluster operators failed to return to available state after node drain")
})
g.DeferCleanup(oc.AsAdmin().WithoutNamespace().Run("adm").Args("uncordon", workerNode).Execute)

out, err := oc.AsAdmin().WithoutNamespace().Run("adm").Args("drain", workerNode, "--ignore-daemonsets", "--delete-emptydir-data", "--timeout=30s").Output()
o.Expect(err).To(o.HaveOccurred(), "drain operation should have been blocked but it wasn't")
o.Expect(strings.Contains(out, "Cannot evict pod as it would violate the pod's disruption budget")).Should(o.BeTrue(), "drain output missing PDB violation error message")
o.Expect(strings.Contains(out, "There are pending nodes to be drained")).Should(o.BeTrue(), "drain output missing pending nodes error message")

g.By("Verify that the pods were not drained from the node")
podsAfterDrain, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-n", namespace, "-o=jsonpath={.items[?(@.spec.nodeName=='"+workerNode+"')].metadata.name}").Output()
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get pods after drain attempt")
o.Expect(podsInWorker).Should(o.BeIdenticalTo(podsAfterDrain), "pods should not have been evicted from the node")
})
})

// author: asahay@redhat.com
Expand Down