diff --git a/CLAUDE.md b/CLAUDE.md index 0ef47beb7..71d7ee7aa 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -198,7 +198,7 @@ Pod **Permissions** is the differentiator — frames the SA's grant as blast rad ### MCP Server -Stateless HTTP at `/mcp` (JSON-RPC). Read tools use `readOnlyHint`, write tools use `destructiveHint: false`. Respects cluster RBAC (impersonates via `DynamicClientFromContext` for write/exec/logs). Enabled by default; `--no-mcp` to disable. Tool catalogue + design rationale lives in `internal/mcp/tools.go` + [docs/mcp.md](docs/mcp.md) — don't restate it here. +Stateless HTTP at `/mcp` (JSON-RPC). Read tools use `readOnlyHint`, write tools use `destructiveHint: true`. Respects cluster RBAC (impersonates via `DynamicClientFromContext` for write/exec/logs). Enabled by default; `--no-mcp` to disable. Tool catalogue + design rationale lives in `internal/mcp/tools.go` + [docs/mcp.md](docs/mcp.md) — don't restate it here. ### Error Handling (Backend) diff --git a/README.md b/README.md index 202d259f5..06d74c1dd 100644 --- a/README.md +++ b/README.md @@ -352,13 +352,13 @@ No auth by default (local use). See the **[Authentication Guide](docs/authentica Radar auto-discovers any CRD in your cluster. Popular tools get [dedicated integrations](docs/integrations.md) with topology edges, detail views, and AI summaries. -**Default chart RBAC** covers the built-in Kubernetes kinds listed below — Workloads, Networking (including NetworkPolicies and PodDisruptionBudgets), Configuration, Storage (PersistentVolumes, PersistentVolumeClaims, StorageClasses), HorizontalPodAutoscalers, ServiceAccounts, LimitRanges, Nodes, Namespaces, and Events. RBAC objects (Roles, ClusterRoles, RoleBindings, ClusterRoleBindings) are opt-in via `rbac.viewRBAC=true`. **CRD-based integrations** (Gateway API, VerticalPodAutoscaler, ArgoCD, FluxCD, cert-manager, etc.) need both the CRD installed in your cluster *and* read access granted — most groups are default-on under `rbac.crdGroups.` (e.g. `gatewayApi`, `verticalPodAutoscaler`); check `values.yaml` or add custom rules via `rbac.additionalRules`. +**Default chart RBAC** covers the built-in Kubernetes kinds listed below — Workloads, Networking (including NetworkPolicies and PodDisruptionBudgets), Configuration, Storage (PersistentVolumes, PersistentVolumeClaims, StorageClasses), HorizontalPodAutoscalers, ServiceAccounts, LimitRanges, ResourceQuotas, Nodes, Namespaces, and Events. RBAC objects (Roles, ClusterRoles, RoleBindings, ClusterRoleBindings) are opt-in via `rbac.viewRBAC=true`. **CRD-based integrations** (Gateway API, VerticalPodAutoscaler, ArgoCD, FluxCD, cert-manager, etc.) need both the CRD installed in your cluster *and* read access granted — most groups are default-on under `rbac.crdGroups.` (e.g. `gatewayApi`, `verticalPodAutoscaler`); check `values.yaml` or add custom rules via `rbac.additionalRules`. | Category | Resources | |----------|-----------| | **Workloads** | Deployments, DaemonSets, StatefulSets, ReplicaSets, Pods, Jobs, CronJobs | | **Networking** | Services, Ingresses, NetworkPolicies, Endpoints, PodDisruptionBudgets | -| **Configuration** | ConfigMaps, Secrets (names only, values hidden) | +| **Configuration** | ConfigMaps, Secrets (names only, values hidden), LimitRanges, ResourceQuotas | | **Storage** | PersistentVolumeClaims, PersistentVolumes, StorageClasses | | **Autoscaling** | HorizontalPodAutoscalers, VerticalPodAutoscalers | | **Cluster** | Nodes, Namespaces, ServiceAccounts, Events | diff --git a/deploy/helm/radar/templates/clusterrole.yaml b/deploy/helm/radar/templates/clusterrole.yaml index 3c1f127f0..7fce74b02 100644 --- a/deploy/helm/radar/templates/clusterrole.yaml +++ b/deploy/helm/radar/templates/clusterrole.yaml @@ -22,6 +22,7 @@ rules: - serviceaccounts - endpoints - limitranges + - resourcequotas verbs: ["get", "list", "watch"] - apiGroups: - policy diff --git a/docs/in-cluster.md b/docs/in-cluster.md index c32d64082..d9cb09594 100644 --- a/docs/in-cluster.md +++ b/docs/in-cluster.md @@ -213,7 +213,7 @@ rules: - apiGroups: ["", "apps", "batch", "networking.k8s.io"] resources: ["pods", "services", "deployments", "daemonsets", "statefulsets", "replicasets", "jobs", "cronjobs", "configmaps", "events", - "ingresses", "persistentvolumeclaims"] + "ingresses", "persistentvolumeclaims", "resourcequotas"] verbs: ["get", "list", "watch"] - apiGroups: [""] resources: ["pods/log"] diff --git a/docs/integrations.md b/docs/integrations.md index 3979af700..7ca23272c 100644 --- a/docs/integrations.md +++ b/docs/integrations.md @@ -878,7 +878,7 @@ Deferred to a future "full Crossplane" pass: | PolicyReport | `wgpolicyk8s.io/v1alpha2` | — | Yes | Yes | | ClusterPolicyReport | `wgpolicyk8s.io/v1alpha2` | — | Yes | Yes | -PolicyReport findings also surface through the unified `/api/issues` endpoint (and the MCP `issues` tool) when opted in via `source=kyverno` / `include_kyverno=true` — `fail` and `error` results map to `critical`, `warn` maps to `warning`, and `pass` / `skip` are omitted. +PolicyReport findings are policy posture, not live operational failure, so they are **not** part of the `/api/issues` stream. They surface per-resource: the PolicyReport detail view (above) and the `resourceContext` policy rollup on a resource fetched via `get_resource`. (The cluster audit — `/api/audit` + MCP `get_cluster_audit` — is radar's own static best-practice scanner and does **not** include PolicyReport results.) --- diff --git a/docs/mcp.md b/docs/mcp.md index 32d1e4fe8..ee4b92f52 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -16,7 +16,7 @@ Radar's MCP server solves these: - **Token-optimized** — resources are minified, stripping noise (managed fields, internal annotations, redundant status) while preserving what matters - **Enriched data** — topology graphs, health assessments, deduplicated events, filtered logs (prioritizing errors/warnings) -- **Safe operations** — read tools are read-only; write tools (restart, scale, sync) are clearly annotated and non-destructive +- **Safe operations** — read tools are read-only (`readOnlyHint`); write tools (restart, scale, rollback, sync, apply, cordon/drain) are RBAC-enforced and annotated `destructiveHint` so AI clients can prompt for confirmation - **Secret-safe** — Secret data is never exposed, environment values are redacted, log output is scrubbed for API keys and tokens - **RBAC-aware** — respects your cluster's RBAC permissions - **Vendor-neutral** — works with any MCP-compatible AI tool @@ -165,17 +165,24 @@ Add to `~/.gemini/settings.json`: | Tool | Description | Parameters | |------|-------------|------------| -| `get_dashboard` | Cluster health overview — resource counts, problems, warning events, Helm status. Includes recent changes correlated with detected problems. | `namespace` (optional) | -| `list_resources` | List resources of a kind with minified summaries (pods, deployments, services, CRDs, etc.) | `kind` (required), `namespace` (optional) | -| `get_resource` | Detailed view of a single resource — minified spec + status + metadata + default-on `resourceContext` (managedBy / exposes / selectedBy / uses / runsOn / issue+audit rollups). Optionally include heavier sidecars (events / metrics / logs). | `kind` (required), `namespace` (optional — omit for cluster-scoped kinds: Node, ClusterRole, IngressClass, etc.), `name` (required), `group` (optional, for ambiguous kinds), `include` (optional: `events,metrics,logs`), `context` (optional: `basic` default, `none` for bare minified output) | -| `get_topology` | Topology graph showing resource relationships (nodes and edges). Use `summary` format for LLM-friendly text descriptions of resource chains. | `namespace` (optional), `view` (optional: `traffic` or `resources`), `format` (optional: `graph` or `summary`) | -| `get_events` | Recent Kubernetes events, deduplicated and sorted by recency. Filter by resource kind/name to scope to a specific resource. | `namespace` (optional), `limit` (optional, default 20, max 100), `kind` (optional), `name` (optional) | -| `get_pod_logs` | Filtered pod logs prioritizing errors/warnings, with secret redaction | `namespace` (required), `name` (required), `container` (optional), `tail_lines` (optional, default 200) | -| `list_namespaces` | List all namespaces with status | (none) | +| `issues` | "What's broken right now?" — a ranked, curated stream of live operational failures: failing workloads/pods, dangling references, pod-startup blockers (unschedulable / admission-rejected / stuck post-bind), and False CRD conditions. No source filter; each row carries a `source` label sliceable via `filter`. For static posture use `get_cluster_audit`; for raw events use `get_events`. | `namespace` (optional), `severity` (optional: `critical,warning`), `kind` (optional), `filter` (optional CEL), `limit` (optional, default 200, max 1000) | +| `diagnose` | Root-cause one workload (Pod/Deployment/StatefulSet/DaemonSet) in a single call: minified resource + `resourceContext` + current AND previous container logs across its pods + filtered events + a `startupBlockers` section when it can't reach Running. Replaces a `get_resource → events → logs → logs(previous)` chain. | `kind` (required), `namespace` (required), `name` (required) | +| `get_dashboard` | Cluster/namespace health overview — resource counts, failing pods, unhealthy workloads, warning events, Helm status. Inventory-style triage before drilling in. | `namespace` (optional) | +| `top_resources` | Live metrics ranked like `kubectl top | sort`, joined with K8s context (status, restarts, owner, requests/limits). Use for CPU/memory/OOM/load symptoms. | `kind` (optional: `pods` default, `workloads`, `nodes`), `namespace` (optional), `sort` (optional: `cpu` default, `memory`), `limit` (optional, default 20, max 100) | +| `list_resources` | List resources of a kind with minified summaries + per-row `summaryContext` (managedBy / health / issueCount). | `kind` (required), `group` (optional), `namespace` (optional), `context` (optional: default / `none`) | +| `search` | Find resources by content/term match (config keys, env refs, images, label values, CRD fields, status messages). Tokens AND'd; secret values never indexed. Supports `kind:`/`ns:`/`label:`/`image:` modifiers and CEL `filter`. | `query` (required), `filter` (optional CEL), `limit` (optional) | +| `get_resource` | Detailed view of a single resource — minified spec + status + metadata + default-on `resourceContext` (managedBy / exposes / selectedBy / uses / runsOn / issue+audit rollups). Optionally include heavier sidecars (events / metrics). For logs use `get_pod_logs` / `get_workload_logs` / `diagnose`. | `kind` (required), `namespace` (optional — omit for cluster-scoped kinds: Node, ClusterRole, IngressClass, etc.), `name` (required), `group` (optional, for ambiguous kinds), `include` (optional: `events,metrics`), `context` (optional: `basic` default, `none` for bare minified output) | +| `get_topology` | Whole-namespace/cluster topology graph (nodes + edges). Use `summary` format for LLM-friendly text chains. Once you have a suspect root, prefer `get_neighborhood`. | `namespace` (optional), `view` (optional: `traffic` or `resources`), `format` (optional: `graph` or `summary`) | +| `get_neighborhood` | BFS-expanded topology neighborhood around one known root — cheaper and clearer than `get_topology` for cross-resource failures (routing, selector/endpoint, refs, owner chains). RBAC-filtered. | `kind` (required), `namespace` (optional), `name` (required), `profile` (optional: `auto` default / `all`), `hops` (optional, default 1, max 2) | +| `get_events` | Recent Kubernetes Warning events, deduplicated and sorted by recency. Filter by resource kind/name to scope. | `namespace` (optional), `limit` (optional, default 20, max 100), `kind` (optional), `name` (optional) | | `get_changes` | Recent resource changes (creates, updates, deletes) from the cluster timeline. Use to investigate what changed before an incident. | `namespace` (optional), `kind` (optional), `name` (optional), `since` (optional, e.g. `1h`, `30m`; default `1h`), `limit` (optional, default 20, max 50) | +| `get_pod_logs` | Filtered pod logs prioritizing errors/warnings, with secret redaction. Set `grep` for server-side filtering. | `namespace` (required), `name` (required), `container` (optional), `tail_lines` (optional, default 200), `grep` (optional) | +| `get_workload_logs` | Aggregated, AI-filtered logs from all pods of a workload (Deployment, StatefulSet, DaemonSet) | `kind` (required), `namespace` (required), `name` (required), `container` (optional), `tail_lines` (optional, default 100 per pod), `grep` (optional) | +| `get_cluster_audit` | Static config posture — best-practice findings (Security / Reliability / Efficiency) with remediation. INDEPENDENT of operational health; for "what's broken right now?" use `issues`. | `namespace` (optional), `category` (optional), `severity` (optional) | +| `list_packages` | Installed packages (Helm releases, label-managed workloads, CRDs, Argo Applications, Flux HelmReleases + Kustomizations) with source provenance, versions, and health, in one call. | `namespace` (optional), `source` (optional), `chart` (optional substring) | | `list_helm_releases` | List all Helm releases with status and health | `namespace` (optional) | | `get_helm_release` | Detailed Helm release info with optional values, history, and manifest diff | `namespace` (required), `name` (required), `include` (optional: `values,history,diff`), `diff_revision_1` (required when `include=diff`) / `diff_revision_2` (optional) | -| `get_workload_logs` | Aggregated, AI-filtered logs from all pods of a workload (Deployment, StatefulSet, DaemonSet) | `kind` (required), `namespace` (required), `name` (required), `container` (optional), `tail_lines` (optional, default 100 per pod) | +| `list_namespaces` | List all namespaces with status | (none) | | `get_subject_permissions` | Effective RBAC permissions of a ServiceAccount / User / Group: bindings (each with `inheritedFromGroup` set when applicable), deduplicated flat rule list, and (for SAs) the Pods running as it. Use to answer "is this SA over-privileged?" or "what's the blast radius if this Pod is compromised?" | `kind` (required: `ServiceAccount`, `User`, or `Group`), `namespace` (required for ServiceAccount; omit for User/Group), `name` (required) | ### Write Tools @@ -198,7 +205,7 @@ Add to `~/.gemini/settings.json`: ## Security -- **Safe by design** — read tools are strictly read-only; write tools perform non-destructive operations (restart, scale, sync) and are annotated with MCP tool hints so AI clients can distinguish them +- **Safe by design** — read tools are strictly read-only and annotated with `readOnlyHint`; write tools (restart, scale, rollback, sync, apply, cordon/drain) are RBAC-enforced and annotated with `destructiveHint` so AI clients can prompt for confirmation. Some are genuinely destructive — `apply_resource` uses server-side apply with `Force=true` (can take field ownership from Helm/Flux), `manage_node drain` evicts pods, and `rollback`/`terminate` overwrite or abort desired state - **RBAC-aware** — every call enforces RBAC at the same boundary as the REST API: - **Local binary**: the cache uses your kubeconfig identity, so MCP can only see what `kubectl` can see for that user - **In-cluster (auth enabled)**: read tools intersect namespaced reads with the calling user's RBAC-allowed namespaces; cluster-scoped reads (Nodes, PVs, ClusterRoles, cluster-scoped CRDs) are gated per-kind via SubjectAccessReview, so cluster-wide pod visibility doesn't implicitly grant Node read; write tools, exec, and logs are fully impersonated so the apiserver enforces the user's RBAC end-to-end diff --git a/internal/issues/filter_integration_test.go b/internal/issues/filter_integration_test.go index 1afa690b3..5a11e52a8 100644 --- a/internal/issues/filter_integration_test.go +++ b/internal/issues/filter_integration_test.go @@ -2,46 +2,33 @@ package issues import ( "testing" - "time" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" "github.com/skyhook-io/radar/internal/filter" "github.com/skyhook-io/radar/internal/k8s" ) // Filter integration tests — exercise ComposeWithStats with a compiled -// CEL filter, covering match/drop, eval-error stats, and the source- +// CEL filter, covering match/drop, eval-error stats, and the // post-filter ordering invariant that limit applies last. -func TestCompose_WithCELFilter_FiltersByCount(t *testing.T) { - // Mix of low-count problems and high-count events; `count > 5` - // should keep only the events. - now := time.Now() +func TestCompose_WithCELFilter_MatchesAndDrops(t *testing.T) { + // Two problem rows; a reason predicate should keep only the match. p := &fakeProvider{ problems: []k8s.Problem{ - {Kind: "Pod", Name: "p1", Severity: "critical", Reason: "x"}, - }, - events: []*corev1.Event{ - { - ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "evt-1"}, - InvolvedObject: corev1.ObjectReference{Kind: "Pod", Name: "evt-pod"}, - Reason: "BackOff", - Type: corev1.EventTypeWarning, - FirstTimestamp: metav1.Time{Time: now.Add(-2 * time.Minute)}, - LastTimestamp: metav1.Time{Time: now.Add(-1 * time.Minute)}, - Count: 10, - }, + {Kind: "Pod", Namespace: "ns", Name: "crash", Severity: "critical", Reason: "CrashLoopBackOff"}, + {Kind: "Pod", Namespace: "ns", Name: "oom", Severity: "critical", Reason: "OOMKilled"}, }, } - f, err := filter.CompileIssueFilter(`count > 5`) + f, err := filter.CompileIssueFilter(`reason == "OOMKilled"`) if err != nil { t.Fatal(err) } - out, stats := ComposeWithStats(p, Filters{Filter: f, IncludeEvents: true}) - if len(out) != 1 || out[0].Name != "evt-pod" { - t.Fatalf("expected single event-source hit, got %+v", out) + out, stats := ComposeWithStats(p, Filters{Filter: f}) + if len(out) != 1 || out[0].Name != "oom" { + t.Fatalf("expected single OOMKilled hit, got %+v", out) } if stats.FilterErrors != 0 { t.Errorf("clean filter, expected no eval errors, got %d", stats.FilterErrors) @@ -72,6 +59,35 @@ func TestCompose_FilterAppliedBeforeLimit(t *testing.T) { } } +func TestCompose_WithCELFilter_SourceBinding(t *testing.T) { + // The `source=` query param was removed; the CEL `source` binding is now + // the ONLY way to slice issues by detector (documented migration path in + // the HTTP handler + MCP tool schema). Guard that the binding exists and + // slices correctly across two distinct sources. + gvr := schema.GroupVersionResource{Group: "argoproj.io", Version: "v1alpha1", Resource: "applications"} + app := &unstructured.Unstructured{Object: map[string]any{ + "apiVersion": "argoproj.io/v1alpha1", + "kind": "Application", + "metadata": map[string]any{"name": "my-app", "namespace": "argocd"}, + "status": map[string]any{"conditions": []any{ + map[string]any{"type": "Synced", "status": "False", "reason": "OutOfSync", "message": "drift"}, + }}, + }} + p := &fakeProvider{ + problems: []k8s.Problem{{Kind: "Deployment", Namespace: "argocd", Name: "api", Severity: "critical", Reason: "down"}}, + dynamic: map[schema.GroupVersionResource][]*unstructured.Unstructured{gvr: {app}}, + kinds: map[schema.GroupVersionResource]string{gvr: "Application"}, + } + f, err := filter.CompileIssueFilter(`source == "condition"`) + if err != nil { + t.Fatal(err) + } + out, _ := ComposeWithStats(p, Filters{Filter: f}) + if len(out) != 1 || out[0].Source != SourceCondition { + t.Fatalf("source==\"condition\" should keep only the condition row, got %+v", out) + } +} + func TestCompose_FilterEvalError_StatsPopulated(t *testing.T) { // Reference an unbound-but-syntactically-valid path that won't // resolve on any actual issue row — the dyn-typed env declares diff --git a/internal/issues/issues.go b/internal/issues/issues.go index 2ef7da86e..72b09354e 100644 --- a/internal/issues/issues.go +++ b/internal/issues/issues.go @@ -6,13 +6,11 @@ import ( "strings" "time" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "github.com/skyhook-io/radar/internal/k8s" bp "github.com/skyhook-io/radar/pkg/audit" - "github.com/skyhook-io/radar/pkg/policyreports" ) // Provider abstracts the data sources Compose needs. Implementations @@ -27,23 +25,16 @@ type Provider interface { // can filter the "direct config error" category separately from the // workload-state-based SourceProblem signals. DetectMissingRefs(namespaces []string) []k8s.Problem - WarningEvents(namespaces []string, since time.Duration) []*corev1.Event + // DetectScheduling returns placement/admission/post-bind failures — + // unschedulable Pods (with the offending node constraint resolved), + // admission rejections (quota/LimitRange/PodSecurity/webhook, where no + // Pod exists), and pods stuck post-bind (CNI/volume). Surfaced under + // SourceScheduling so agents/UI can isolate "why won't this run". + DetectScheduling(namespaces []string) []k8s.Problem // CRD-condition fallback inputs. WatchedDynamic() []schema.GroupVersionResource ListDynamic(gvr schema.GroupVersionResource, namespace string) ([]*unstructured.Unstructured, error) KindForGVR(gvr schema.GroupVersionResource) string - // KyvernoFindings returns every subject + findings pair currently - // indexed from PolicyReport / ClusterPolicyReport documents. Returns - // nil when Kyverno is not installed (the common case) — callers - // must treat nil as "no findings to surface" rather than an error. - KyvernoFindings() []policyreports.SubjectFindings - // KyvernoStatus reports the PolicyReport index lifecycle phase so - // callers can distinguish "Kyverno not installed" from "warmup - // deferred (cluster too large)" from "warmup in flight" from "ready - // but empty". See k8s.KyvernoStatus for the enum values. Returned as - // a plain string so callers in this package don't need to import - // internal/k8s just to read the value. - KyvernoStatus() string } type dynamicScopeProvider interface { @@ -66,8 +57,8 @@ type ComposeStats struct { TotalMatched int } -// Compose runs the four sources and merges their output. Backward- -// compatible signature for callers that don't care about stats. +// Compose runs the curated operational sources and merges their output. +// Backward-compatible signature for callers that don't care about stats. func Compose(p Provider, f Filters) []Issue { out, _ := ComposeWithStats(p, f) return out @@ -96,71 +87,48 @@ func ComposeWithStats(p Provider, f Filters) ([]Issue, ComposeStats) { out := make([]Issue, 0, 64) now := time.Now() + // issues = "what's broken right now" — the curated operational + // sources, always composed. Raw Warning events live in get_events / + // the timeline; Kyverno / policy posture lives with audit/compliance; + // static best-practice findings live in audit. None of those belong in + // the live-failure stream, so they are deliberately NOT sources here. + // `source` survives only as an output label on each row (+ CEL filter), + // not as an input filter — detection provenance is not a triage axis. + // ---- Source: problem (radar's hardcoded checks) ----------------- - if wantSource(f, SourceProblem) { - for _, p := range p.DetectProblems(f.Namespaces) { - out = append(out, fromProblem(p, now, SourceProblem)) - } - for _, p := range p.DetectCAPIProblems(f.Namespaces) { - out = append(out, fromProblem(p, now, SourceProblem)) - } + for _, p := range p.DetectProblems(f.Namespaces) { + out = append(out, fromProblem(p, now, SourceProblem)) + } + for _, p := range p.DetectCAPIProblems(f.Namespaces) { + out = append(out, fromProblem(p, now, SourceProblem)) } // ---- Source: missing_ref (dangling-ref detection) -------------- // Direct by-name reference targets that don't exist (Pod → missing // PVC / CM / Secret / SA, HPA → missing scaleTargetRef, Ingress → - // missing backend Service, etc.). Same Problem shape as SourceProblem - // rows, separate Source so callers can filter "direct config errors" - // from "workload-state problems." - if wantSource(f, SourceMissingRef) { - for _, p := range p.DetectMissingRefs(f.Namespaces) { - out = append(out, fromProblem(p, now, SourceMissingRef)) - } - } - - // ---- Source: condition (generic CRD .status.conditions fallback) ---- - if wantSource(f, SourceCondition) { - out = append(out, detectGenericCRDIssues(p, f)...) + // missing backend Service, etc.). + for _, p := range p.DetectMissingRefs(f.Namespaces) { + out = append(out, fromProblem(p, now, SourceMissingRef)) } - // ---- Source: kyverno (PolicyReport findings) ------------------- - // Off by default, mirroring audit. Kyverno emits findings per - // (policy, rule, subject) tuple and a baseline PSS profile alone - // produces 10+ rows per workload — surfacing them in the default - // Issue view would drown the operator-actionable signals. Opt in - // via IncludeKyverno or source=kyverno. - if f.IncludeKyverno && wantSource(f, SourceKyverno) { - for _, sf := range p.KyvernoFindings() { - if !subjectInNamespaces(sf.Subject, f.Namespaces) { - continue - } - for _, fin := range sf.Findings { - if issue, ok := fromKyverno(sf.Subject, fin, now); ok { - out = append(out, issue) - } - } - } + // ---- Source: scheduling (placement + admission + post-bind) ----- + // Why a Pod can't reach Running, decomposed: unschedulable (with the + // offending node label/taint named), admission-rejected (quota/ + // PodSecurity/webhook — no Pod object exists), or stuck post-bind + // (CNI/volume). + for _, p := range p.DetectScheduling(f.Namespaces) { + out = append(out, fromProblem(p, now, SourceScheduling)) } - // ---- Source: event (recent K8s Warning events) ----------------- - // Gated by IncludeEvents. Events are the noisiest source by far - // on real clusters (each broken Pod - // emits a Warning Event every few seconds, retained for the cache - // window) and almost always duplicate signal already surfaced by - // SourceProblem. Default-off keeps the Issue count aligned with - // the per-cluster "X problems" intuition; user opts in via - // include_events=true or by passing "event" in source=. - if f.IncludeEvents && wantSource(f, SourceEvent) { - for _, e := range p.WarningEvents(f.Namespaces, f.Since) { - out = append(out, fromWarningEvent(e)) - } - } + // ---- Source: condition (generic CRD .status.conditions fallback) ---- + out = append(out, detectGenericCRDIssues(p, f)...) // Apply remaining filters (severity, kind, namespace) post-compose // since each source has its own native filtering surface and // pushing filters down individually would multiply branching. out = applyFilters(out, f) out = applyClusterScopedAccess(out, f) + out = dedupePodSchedulingOverProblem(out) // Optional CEL filter — evaluated last so it sees the normalized // row shape. Eval errors count as non-match (matches "missing @@ -371,129 +339,44 @@ func fromProblem(p k8s.Problem, now time.Time, source Source) Issue { Reason: p.Reason, Message: p.Message, FirstSeen: since, - LastSeen: now, + LastSeen: now, Count: 1, RestartCount: p.RestartCount, LastTerminatedReason: p.LastTerminatedReason, } } -// fromKyverno maps a single PolicyReport Finding into an Issue. The -// second return is false when the finding's result is not a violation -// we surface (pass / skip / unknown verdicts produce no Issue). -// -// Severity mapping is by Kyverno's `result` field — NOT by the report's -// `severity` field. Rationale: `severity` is a free-form string set by -// policy authors (e.g. "high", "medium", "low", or empty), inconsistent -// across policies, and not aligned with the operator-actionable axis we -// expose to consumers. The `result` enum is authoritative on whether -// the engine considered the subject in violation, which is what the -// Issue list represents. -// -// fail → SeverityCritical (policy actively rejected the subject) -// warn → SeverityWarning (policy flagged but did not block) -// error → SeverityCritical (engine could not evaluate; operator needs to know) -// pass / skip / other → omitted -func fromKyverno(subj policyreports.Subject, fin policyreports.Finding, now time.Time) (Issue, bool) { - var sev Severity - switch strings.ToLower(fin.Result) { - case "fail", "error": - sev = SeverityCritical - case "warn": - sev = SeverityWarning - default: - return Issue{}, false - } - return Issue{ - Severity: sev, - Source: SourceKyverno, - Kind: subj.Kind, - Group: subj.Group, - Namespace: subj.Namespace, - Name: subj.Name, - Reason: fin.Policy, - Message: fin.Message, - FirstSeen: now, - LastSeen: now, - Count: 1, - }, true -} - -// subjectInNamespaces reports whether a Kyverno subject should pass the -// namespace filter. Empty Namespaces means "all namespaces"; cluster- -// scoped subjects (Namespace == "") always pass — they're gated later -// by CanReadClusterScoped. -func subjectInNamespaces(subj policyreports.Subject, namespaces []string) bool { - if len(namespaces) == 0 || subj.Namespace == "" { - return true - } - for _, ns := range namespaces { - if ns == subj.Namespace { - return true - } - } - return false -} - -// fromWarningEvent maps a K8s Warning event to an Issue. Severity is -// always `warning`; events don't ship a severity scale that maps cleanly -// to our `critical` tier (a CrashLoopBackOff event coexists with the -// problem-source `critical` Deployment issue, so we don't double-amplify). -func fromWarningEvent(e *corev1.Event) Issue { - first := e.FirstTimestamp.Time - last := e.LastTimestamp.Time - if last.IsZero() { - last = e.EventTime.Time - } - if first.IsZero() { - first = last - } - // Event.InvolvedObject carries apiVersion (group/version); split out - // the group so cross-group consumers don't collide when a Knative - // Service and a core Service share name+ns. - group, _, _ := strings.Cut(e.InvolvedObject.APIVersion, "/") - if e.InvolvedObject.APIVersion != "" && !strings.Contains(e.InvolvedObject.APIVersion, "/") { - // "v1" → core group "". - group = "" - } - return Issue{ - Severity: SeverityWarning, - Source: SourceEvent, - Kind: e.InvolvedObject.Kind, - Group: resolveGroup(group, e.InvolvedObject.Kind), - Namespace: e.Namespace, - Name: e.InvolvedObject.Name, - Reason: e.Reason, - Message: e.Message, - FirstSeen: first, - LastSeen: last, - Count: int(e.Count), - } -} - // --------------------------------------------------------------------------- // Filter + sort helpers // --------------------------------------------------------------------------- -// wantSource implements the documented `source=` contract: it is a FILTER, -// not an additive opt-in. When Filters.Sources is empty, every source is -// allowed (defaults are then narrowed elsewhere — e.g. event / kyverno -// collection only runs when the matching IncludeX flag is set). -// When Filters.Sources is non-empty, only the listed sources pass through; -// passing source=kyverno therefore returns ONLY Kyverno rows, not -// "defaults plus Kyverno". Callers that want "defaults plus X" should use -// the include_X flags instead (the HTTP handler translates include_X=true -// into both IncludeX=true AND leaves Sources empty, so the defaults stay). -func wantSource(f Filters, s Source) bool { - if len(f.Sources) == 0 { - return true +// dedupePodSchedulingOverProblem drops the generic problem-source row for a +// Pod when the scheduling source emitted one for the same Pod. A pod stuck +// post-bind (ContainerCreating on a CNI/volume stall) trips both: DetectProblems +// flags it Pending>5m and DetectPostBindProblems names the actual blocker. The +// scheduling row is strictly richer, so it wins. (Bind-time unschedulable pods +// are already skipped in DetectProblems, so this only fires on the post-bind +// overlap.) A plain DetectProblems skip can't replace this — the problem +// threshold is 5m but the post-bind event window is 10m, so a pod stuck >10m +// would lose its only row. +func dedupePodSchedulingOverProblem(in []Issue) []Issue { + schedPods := map[string]bool{} + for _, i := range in { + if i.Source == SourceScheduling && i.Kind == "Pod" { + schedPods[i.Namespace+"/"+i.Name] = true + } + } + if len(schedPods) == 0 { + return in } - for _, want := range f.Sources { - if want == s { - return true + out := in[:0] + for _, i := range in { + if i.Source == SourceProblem && i.Kind == "Pod" && schedPods[i.Namespace+"/"+i.Name] { + continue } + out = append(out, i) } - return false + return out } func applyFilters(in []Issue, f Filters) []Issue { diff --git a/internal/issues/issues_test.go b/internal/issues/issues_test.go index 5d0da7782..4c252f0dc 100644 --- a/internal/issues/issues_test.go +++ b/internal/issues/issues_test.go @@ -2,17 +2,13 @@ package issues import ( "sort" - "strings" "testing" "time" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "github.com/skyhook-io/radar/internal/k8s" - "github.com/skyhook-io/radar/pkg/policyreports" ) // fakeProvider — minimal Provider for unit testing. Each field @@ -21,21 +17,17 @@ import ( type fakeProvider struct { problems []k8s.Problem missingRefs []k8s.Problem + scheduling []k8s.Problem capiProblems []k8s.Problem - events []*corev1.Event dynamic map[schema.GroupVersionResource][]*unstructured.Unstructured kinds map[schema.GroupVersionResource]string namespaced map[schema.GroupVersionResource]bool - kyverno []policyreports.SubjectFindings - kyvernoStat string } func (f *fakeProvider) DetectProblems(_ []string) []k8s.Problem { return f.problems } func (f *fakeProvider) DetectMissingRefs(_ []string) []k8s.Problem { return f.missingRefs } +func (f *fakeProvider) DetectScheduling(_ []string) []k8s.Problem { return f.scheduling } func (f *fakeProvider) DetectCAPIProblems(_ []string) []k8s.Problem { return f.capiProblems } -func (f *fakeProvider) WarningEvents(_ []string, _ time.Duration) []*corev1.Event { - return f.events -} func (f *fakeProvider) WatchedDynamic() []schema.GroupVersionResource { out := make([]schema.GroupVersionResource, 0, len(f.dynamic)) for g := range f.dynamic { @@ -53,12 +45,6 @@ func (f *fakeProvider) NamespacedForGVR(gvr schema.GroupVersionResource) (bool, namespaced, ok := f.namespaced[gvr] return namespaced, ok } -func (f *fakeProvider) KyvernoFindings() []policyreports.SubjectFindings { - return f.kyverno -} -func (f *fakeProvider) KyvernoStatus() string { - return f.kyvernoStat -} func TestCompose_NormalizesProblemSeverity(t *testing.T) { p := &fakeProvider{ @@ -81,59 +67,82 @@ func TestCompose_NormalizesProblemSeverity(t *testing.T) { } } -func TestCompose_WarningEventsIncluded(t *testing.T) { - now := time.Now() +func TestCompose_PodSchedulingWinsOverProblem(t *testing.T) { + // A pod stuck post-bind trips both sources: DetectProblems flags it + // Pending>5m and DetectScheduling names the actual CNI/volume blocker. + // The scheduling row is richer, so the generic problem row for the SAME + // pod must be dropped — without collapsing unrelated rows. p := &fakeProvider{ - events: []*corev1.Event{ - { - ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "evt-1"}, - InvolvedObject: corev1.ObjectReference{Kind: "Pod", Name: "p"}, - Reason: "FailedMount", - Message: "could not mount volume", - Type: corev1.EventTypeWarning, - FirstTimestamp: metav1.Time{Time: now.Add(-2 * time.Minute)}, - LastTimestamp: metav1.Time{Time: now.Add(-1 * time.Minute)}, - Count: 5, - }, + problems: []k8s.Problem{ + {Kind: "Pod", Namespace: "ns", Name: "stuck", Severity: "high", Reason: "Pending"}, + {Kind: "Pod", Namespace: "ns", Name: "other", Severity: "high", Reason: "CrashLoopBackOff"}, + {Kind: "Deployment", Namespace: "ns", Name: "stuck", Severity: "critical", Reason: "down"}, + }, + scheduling: []k8s.Problem{ + {Kind: "Pod", Namespace: "ns", Name: "stuck", Severity: "high", Reason: "VolumeMount"}, }, } - // Events are opt-in; IncludeEvents=true is required to surface them - // from Compose. The default-off behavior is covered separately by - // TestCompose_EventsExcludedByDefault. - out := Compose(p, Filters{IncludeEvents: true}) - if len(out) != 1 { - t.Fatalf("got %d issues", len(out)) + out := Compose(p, Filters{}) + + var stuckPodRows []Issue + for _, i := range out { + if i.Kind == "Pod" && i.Name == "stuck" { + stuckPodRows = append(stuckPodRows, i) + } } - if out[0].Source != SourceEvent { - t.Fatalf("expected source=event, got %s", out[0].Source) + if len(stuckPodRows) != 1 { + t.Fatalf("expected exactly 1 row for Pod ns/stuck (scheduling wins), got %d: %+v", len(stuckPodRows), out) } - if out[0].Count != 5 { - t.Fatalf("count not propagated: %d", out[0].Count) + if stuckPodRows[0].Source != SourceScheduling || stuckPodRows[0].Reason != "VolumeMount" { + t.Errorf("the surviving Pod row should be the scheduling one, got %+v", stuckPodRows[0]) + } + // The unrelated problem-source pod and the same-name Deployment must + // survive — dedup keys on (source=problem, kind=Pod, ns/name) only. + var sawOtherPod, sawDeploy bool + for _, i := range out { + if i.Kind == "Pod" && i.Name == "other" { + sawOtherPod = true + } + if i.Kind == "Deployment" && i.Name == "stuck" { + sawDeploy = true + } + } + if !sawOtherPod { + t.Errorf("unrelated problem-source Pod must not be dropped: %+v", out) + } + if !sawDeploy { + t.Errorf("same-name Deployment must not be dropped by Pod dedup: %+v", out) } } -func TestCompose_EventsExcludedByDefault(t *testing.T) { - // The default Compose call must NOT surface warning events. Pins - // the opt-in contract so a future refactor doesn't silently - // re-enable the event flood on noisy clusters. - now := time.Now() +func TestCompose_SchedulingComposedByDefault(t *testing.T) { + countSource := func(in []Issue, s Source) int { + n := 0 + for _, i := range in { + if i.Source == s { + n++ + } + } + return n + } p := &fakeProvider{ - events: []*corev1.Event{{ - ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "evt-1"}, - InvolvedObject: corev1.ObjectReference{Kind: "Pod", Name: "p"}, - Reason: "FailedMount", - Type: corev1.EventTypeWarning, - LastTimestamp: metav1.Time{Time: now}, - Count: 1, - }}, + problems: []k8s.Problem{ + {Kind: "Deployment", Namespace: "prod", Name: "api", Severity: "critical", Reason: "Unavailable"}, + }, + scheduling: []k8s.Problem{ + {Kind: "Pod", Namespace: "prod", Name: "web-x", Severity: "high", Reason: "Unschedulable", Message: "no node has kubernetes.io/arch=arm64"}, + }, } + + // Both curated sources compose unconditionally; each row carries its + // source label for CEL/UI grouping. out := Compose(p, Filters{}) - if len(out) != 0 { - t.Fatalf("event leaked through default Compose: %+v", out) + if countSource(out, SourceScheduling) != 1 || countSource(out, SourceProblem) != 1 { + t.Fatalf("Compose should include problem + scheduling, got %+v", out) } } -func TestCompose_MissingRefsDefaultAndSourceFilter(t *testing.T) { +func TestCompose_MissingRefsComposedByDefault(t *testing.T) { p := &fakeProvider{ problems: []k8s.Problem{ {Kind: "Service", Namespace: "prod", Name: "api", Severity: "warning", Reason: "Selector matches no pods"}, @@ -145,17 +154,7 @@ func TestCompose_MissingRefsDefaultAndSourceFilter(t *testing.T) { out := Compose(p, Filters{}) if !hasIssueSource(out, SourceProblem) || !hasIssueSource(out, SourceMissingRef) { - t.Fatalf("default Compose should include problem + missing_ref, got %+v", out) - } - - out = Compose(p, Filters{Sources: []Source{SourceMissingRef}}) - if len(out) != 1 || out[0].Source != SourceMissingRef || out[0].Reason != "Missing PVC" { - t.Fatalf("source=missing_ref should return only missing refs, got %+v", out) - } - - out = Compose(p, Filters{Sources: []Source{SourceProblem}}) - if len(out) != 1 || out[0].Source != SourceProblem || out[0].Reason != "Selector matches no pods" { - t.Fatalf("source=problem should exclude missing refs, got %+v", out) + t.Fatalf("Compose should include problem + missing_ref, got %+v", out) } } @@ -216,7 +215,7 @@ func TestCompose_CAPIGroupSkippedByGenericFallback(t *testing.T) { dynamic: map[schema.GroupVersionResource][]*unstructured.Unstructured{gvr: {cl}}, kinds: map[schema.GroupVersionResource]string{gvr: "Cluster"}, } - out := Compose(p, Filters{Sources: []Source{SourceCondition}}) + out := Compose(p, Filters{}) if len(out) != 0 { t.Fatalf("CAPI should be skipped by generic fallback: %+v", out) } @@ -263,7 +262,6 @@ func TestCompose_DropsUnauthorizedClusterScopedCRDConditions(t *testing.T) { namespaced: map[schema.GroupVersionResource]bool{gvr: false}, } out := Compose(p, Filters{ - Sources: []Source{SourceCondition}, CanReadClusterScoped: func(kind, group string) bool { if kind != "NodePool" || group != "karpenter.sh" { t.Fatalf("unexpected cluster-scoped check: kind=%q group=%q", kind, group) @@ -276,221 +274,6 @@ func TestCompose_DropsUnauthorizedClusterScopedCRDConditions(t *testing.T) { } } -func TestCompose_KyvernoExcludedByDefault(t *testing.T) { - p := &fakeProvider{ - kyverno: []policyreports.SubjectFindings{{ - Subject: policyreports.Subject{Kind: "Pod", Namespace: "prod", Name: "web"}, - Findings: []policyreports.Finding{ - {Policy: "require-resource-limits", Result: "fail", Message: "missing cpu limit"}, - }, - }}, - } - out := Compose(p, Filters{}) - for _, i := range out { - if i.Source == SourceKyverno { - t.Fatalf("kyverno should be excluded by default, got: %+v", i) - } - } -} - -func TestCompose_KyvernoIncludedWhenOptedIn(t *testing.T) { - p := &fakeProvider{ - kyverno: []policyreports.SubjectFindings{{ - Subject: policyreports.Subject{Kind: "Pod", Namespace: "prod", Name: "web"}, - Findings: []policyreports.Finding{ - {Policy: "require-resource-limits", Result: "fail", Message: "missing cpu limit"}, - }, - }}, - } - out := Compose(p, Filters{IncludeKyverno: true}) - if len(out) != 1 { - t.Fatalf("got %d issues, want 1: %+v", len(out), out) - } - got := out[0] - if got.Source != SourceKyverno { - t.Fatalf("source: %s", got.Source) - } - if got.Severity != SeverityCritical { - t.Fatalf("fail should map to critical, got %s", got.Severity) - } - if got.Kind != "Pod" || got.Namespace != "prod" || got.Name != "web" { - t.Fatalf("subject not propagated: %+v", got) - } - if got.Reason != "require-resource-limits" { - t.Fatalf("reason should be policy name, got %q", got.Reason) - } - if got.Message != "missing cpu limit" { - t.Fatalf("message not propagated: %q", got.Message) - } - if got.Count != 1 { - t.Fatalf("count should be 1, got %d", got.Count) - } -} - -func TestCompose_KyvernoSeverityMapping(t *testing.T) { - // fail/error → critical, warn → warning, pass/skip → omitted. - p := &fakeProvider{ - kyverno: []policyreports.SubjectFindings{{ - Subject: policyreports.Subject{Kind: "Pod", Namespace: "ns", Name: "p"}, - Findings: []policyreports.Finding{ - {Policy: "p1", Rule: "r1", Result: "fail", Message: "fail msg"}, - {Policy: "p2", Rule: "r2", Result: "warn", Message: "warn msg"}, - {Policy: "p3", Rule: "r3", Result: "error", Message: "error msg"}, - {Policy: "p4", Rule: "r4", Result: "pass", Message: "pass msg"}, - {Policy: "p5", Rule: "r5", Result: "skip", Message: "skip msg"}, - }, - }}, - } - out := Compose(p, Filters{IncludeKyverno: true}) - bySev := map[Severity]int{} - for _, i := range out { - bySev[i.Severity]++ - } - if bySev[SeverityCritical] != 2 { - t.Fatalf("expected 2 critical (fail+error), got %d: %+v", bySev[SeverityCritical], out) - } - if bySev[SeverityWarning] != 1 { - t.Fatalf("expected 1 warning, got %d: %+v", bySev[SeverityWarning], out) - } - // pass + skip must not appear. - for _, i := range out { - if strings.Contains(i.Message, "pass msg") || strings.Contains(i.Message, "skip msg") { - t.Fatalf("pass/skip leaked into issues: %+v", i) - } - } -} - -func TestCompose_KyvernoNamespaceFilter(t *testing.T) { - p := &fakeProvider{ - kyverno: []policyreports.SubjectFindings{ - { - Subject: policyreports.Subject{Kind: "Pod", Namespace: "prod", Name: "web"}, - Findings: []policyreports.Finding{{Policy: "p1", Result: "fail"}}, - }, - { - Subject: policyreports.Subject{Kind: "Pod", Namespace: "dev", Name: "api"}, - Findings: []policyreports.Finding{{Policy: "p2", Result: "fail"}}, - }, - { - // Cluster-scoped: namespace filter must NOT drop this. - Subject: policyreports.Subject{Kind: "ClusterRole", Namespace: "", Name: "admin"}, - Findings: []policyreports.Finding{{Policy: "p3", Result: "warn"}}, - }, - }, - } - out := Compose(p, Filters{ - IncludeKyverno: true, - Namespaces: []string{"prod"}, - }) - gotByName := map[string]bool{} - for _, i := range out { - gotByName[i.Name] = true - } - if !gotByName["web"] { - t.Fatalf("prod/web should appear: %+v", out) - } - if gotByName["api"] { - t.Fatalf("dev/api should be filtered out: %+v", out) - } - if !gotByName["admin"] { - t.Fatalf("cluster-scoped subject should pass namespace filter: %+v", out) - } -} - -func TestCompose_KyvernoNilFindingsGraceful(t *testing.T) { - // PolicyReport index returns nil when Kyverno is not installed — - // that's the common case and must not produce issues or errors. - p := &fakeProvider{kyverno: nil} - out := Compose(p, Filters{IncludeKyverno: true}) - for _, i := range out { - if i.Source == SourceKyverno { - t.Fatalf("nil findings should not produce kyverno issues: %+v", i) - } - } -} - -// TestCompose_KyvernoGroupPropagated pins that fromKyverno wires the -// Subject.Group into Issue.Group. Without this, agents and the SPA can't -// tell which CRD a finding belongs to when the Kind is ambiguous (e.g. -// argoproj.io/Application vs another vendor's Application), and the -// SAR-backed CanReadClusterScoped check would query the wrong group. -func TestCompose_KyvernoGroupPropagated(t *testing.T) { - p := &fakeProvider{ - kyverno: []policyreports.SubjectFindings{ - { - Subject: policyreports.Subject{ - Group: "argoproj.io", - Kind: "Application", - Namespace: "prod", - Name: "myapp", - }, - Findings: []policyreports.Finding{ - {Policy: "no-sync-loop", Result: "fail", Message: "sync loop"}, - }, - }, - { - // Core kind: empty group must pass through (not silently - // replaced with anything else). - Subject: policyreports.Subject{ - Group: "", - Kind: "Pod", - Namespace: "prod", - Name: "web", - }, - Findings: []policyreports.Finding{ - {Policy: "require-resource-limits", Result: "fail"}, - }, - }, - }, - } - out := Compose(p, Filters{IncludeKyverno: true}) - if len(out) != 2 { - t.Fatalf("expected 2 issues, got %d: %+v", len(out), out) - } - byKind := map[string]Issue{} - for _, i := range out { - byKind[i.Kind] = i - } - if app, ok := byKind["Application"]; !ok || app.Group != "argoproj.io" { - t.Errorf("Application Group not propagated: %+v", app) - } - if pod, ok := byKind["Pod"]; !ok || pod.Group != "" { - t.Errorf("Pod Group should be empty: %+v", pod) - } -} - -func TestCompose_KyvernoSourceListNarrowsButDoesNotOptIn(t *testing.T) { - // Pins the documented contract: `Sources` is a FILTER, not an - // additive opt-in. The list narrows the response to the named - // sources but does NOT enable collection of noisy sources — - // IncludeKyverno (set by the HTTP/MCP handlers) is what gates - // kyverno emission. With Sources={kyverno} and IncludeKyverno=false - // the response is empty. With IncludeKyverno=true the response is - // kyverno-only (problem source is filtered out because it isn't in - // Sources) — i.e. source=kyverno returns ONLY kyverno rows, not - // "defaults plus kyverno". - p := &fakeProvider{ - kyverno: []policyreports.SubjectFindings{{ - Subject: policyreports.Subject{Kind: "Pod", Namespace: "ns", Name: "p"}, - Findings: []policyreports.Finding{{Policy: "p1", Result: "fail"}}, - }}, - problems: []k8s.Problem{ - {Kind: "Pod", Namespace: "ns", Name: "p", Severity: "critical", Reason: "x"}, - }, - } - // Sources={kyverno} but IncludeKyverno=false → no kyverno emission, - // and problem source filtered out → empty. - out := Compose(p, Filters{Sources: []Source{SourceKyverno}}) - if len(out) != 0 { - t.Fatalf("expected 0 issues without IncludeKyverno, got %+v", out) - } - // With IncludeKyverno=true and Sources={kyverno} → only kyverno. - out = Compose(p, Filters{Sources: []Source{SourceKyverno}, IncludeKyverno: true}) - if len(out) != 1 || out[0].Source != SourceKyverno { - t.Fatalf("expected only kyverno, got %+v", out) - } -} - func TestCompose_SeveritySortedDescending(t *testing.T) { p := &fakeProvider{ problems: []k8s.Problem{ diff --git a/internal/issues/parse.go b/internal/issues/parse.go deleted file mode 100644 index ce3ffe273..000000000 --- a/internal/issues/parse.go +++ /dev/null @@ -1,46 +0,0 @@ -package issues - -import ( - "fmt" - "strings" -) - -// ParseSources parses a comma-separated `source=` list into the typed -// Source slice. Shared between the REST handler (/api/issues) and the -// MCP issues tool — both accept the same source vocabulary and reject -// the same removed values, so keeping the parser in one place avoids -// the two surfaces drifting on what's recognized. -// -// "audit" is explicitly rejected with a redirect message: audit findings -// are static config posture (a separate axis from live operational -// state) and live behind /api/audit / MCP get_cluster_audit. Combining -// them inside the issues source filter is the conflation that drove the -// B7 bench failure. -func ParseSources(v string) ([]Source, error) { - if v == "" { - return nil, nil - } - parts := strings.Split(v, ",") - out := make([]Source, 0, len(parts)) - for _, p := range parts { - switch strings.ToLower(strings.TrimSpace(p)) { - case "": - continue - case "problem": - out = append(out, SourceProblem) - case "missing_ref": - out = append(out, SourceMissingRef) - case "event": - out = append(out, SourceEvent) - case "condition": - out = append(out, SourceCondition) - case "kyverno": - out = append(out, SourceKyverno) - case "audit": - return nil, fmt.Errorf("source=audit was removed — use GET /api/audit (or MCP get_cluster_audit) for static best-practice findings; issues now covers live operational state only") - default: - return nil, fmt.Errorf("unknown source %q (want: problem, missing_ref, event, condition, kyverno)", p) - } - } - return out, nil -} diff --git a/internal/issues/provider.go b/internal/issues/provider.go index a5b4edc7d..09c0f6356 100644 --- a/internal/issues/provider.go +++ b/internal/issues/provider.go @@ -1,15 +1,10 @@ package issues import ( - "time" - - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime/schema" "github.com/skyhook-io/radar/internal/k8s" - "github.com/skyhook-io/radar/pkg/policyreports" ) // CacheProvider adapts radar's in-process caches to the Provider @@ -65,6 +60,26 @@ func (p *CacheProvider) DetectMissingRefs(namespaces []string) []k8s.Problem { return flattenNamespacedProblems(perNs) } +// DetectScheduling fans the three scheduling detectors (bind-time, +// admission, post-bind) across namespaces. All rows are namespaced, so the +// flattenNamespacedProblems convention applies unchanged. +func (p *CacheProvider) DetectScheduling(namespaces []string) []k8s.Problem { + detect := func(ns string) []k8s.Problem { + out := k8s.DetectSchedulingProblems(p.cache, ns) + out = append(out, k8s.DetectAdmissionProblems(p.cache, ns)...) + out = append(out, k8s.DetectPostBindProblems(p.cache, ns)...) + return out + } + if len(namespaces) == 0 { + return detect("") + } + perNs := make([][]k8s.Problem, 0, len(namespaces)) + for _, ns := range namespaces { + perNs = append(perNs, detect(ns)) + } + return flattenNamespacedProblems(perNs) +} + func (p *CacheProvider) DetectCAPIProblems(namespaces []string) []k8s.Problem { if p.dynamic == nil || p.discovery == nil { return nil @@ -102,53 +117,6 @@ func flattenNamespacedProblems(perNs [][]k8s.Problem) []k8s.Problem { return out } -func (p *CacheProvider) WarningEvents(namespaces []string, since time.Duration) []*corev1.Event { - if p.cache.Events() == nil { - return nil - } - cutoff := time.Time{} - if since > 0 { - cutoff = time.Now().Add(-since) - } - collect := func(ns string) []*corev1.Event { - var lst []*corev1.Event - var err error - if ns == "" { - lst, err = p.cache.Events().List(labels.Everything()) - } else { - lst, err = p.cache.Events().Events(ns).List(labels.Everything()) - } - if err != nil { - return nil - } - out := make([]*corev1.Event, 0, len(lst)) - for _, e := range lst { - if e.Type != corev1.EventTypeWarning { - continue - } - if !cutoff.IsZero() { - last := e.LastTimestamp.Time - if last.IsZero() { - last = e.EventTime.Time - } - if last.Before(cutoff) { - continue - } - } - out = append(out, e) - } - return out - } - if len(namespaces) == 0 { - return collect("") - } - var merged []*corev1.Event - for _, ns := range namespaces { - merged = append(merged, collect(ns)...) - } - return merged -} - func (p *CacheProvider) WatchedDynamic() []schema.GroupVersionResource { if p.dynamic == nil { return nil @@ -163,21 +131,6 @@ func (p *CacheProvider) ListDynamic(gvr schema.GroupVersionResource, namespace s return p.dynamic.List(gvr, namespace) } -func (p *CacheProvider) KyvernoFindings() []policyreports.SubjectFindings { - idx := k8s.GetPolicyReportIndex() - if idx == nil { - return nil - } - return idx.All() -} - -// KyvernoStatus is a thin string-typed wrapper around k8s.GetKyvernoStatus -// so the issues package doesn't need to depend on the k8s package for the -// enum. Values are the constants documented on k8s.KyvernoStatus. -func (p *CacheProvider) KyvernoStatus() string { - return string(k8s.GetKyvernoStatus()) -} - func (p *CacheProvider) KindForGVR(gvr schema.GroupVersionResource) string { if p.discovery == nil { return "" diff --git a/internal/issues/types.go b/internal/issues/types.go index b08d4e1fc..9b3d7998a 100644 --- a/internal/issues/types.go +++ b/internal/issues/types.go @@ -4,19 +4,22 @@ // (failing Deployments, NotReady Nodes, pending PVCs…) // - missing_ref — direct by-name references to objects that do not exist // (missing PVCs, ConfigMaps, Secrets, backend Services, roleRefs…) +// - scheduling — why a Pod can't run: unschedulable (arch/taint/resources/ +// affinity, with the offending node label named), rejected at admission +// (quota/LimitRange/PodSecurity/webhook — no Pod is even created), or +// stuck post-bind (CNI IP exhaustion, volume attach/mount) // - condition — generic CRD .status.conditions[].status=False fallback // (Argo/Flux/Knative/Crossplane/cert-manager/KEDA) -// - event — recent K8s Warning events (opt-in; noisy) -// - kyverno — PolicyReport findings (opt-in) // -// All five describe LIVE OPERATIONAL STATE — "what is failing right -// now". Static best-practice/posture findings (runAsRoot, missing -// probes, no PDB, deprecated APIs, …) are a separate axis and live -// in pkg/audit + /api/audit + MCP get_cluster_audit. The two are NOT -// composed here: a healthy pod can have many audit findings, a -// crashing pod can have zero. Combining them would force consumers -// to disambiguate "is this critical operational or critical posture?" -// at every callsite. +// All four describe LIVE OPERATIONAL STATE — "what is failing right +// now". Two adjacent signals are deliberately NOT composed here, each +// with its own home: raw K8s Warning events (get_events + the timeline) +// and policy/posture — Kyverno PolicyReports + static best-practice +// findings (runAsRoot, missing probes, no PDB, deprecated APIs, …) which +// live in pkg/audit + /api/audit + MCP get_cluster_audit. A healthy pod +// can have many audit findings, a crashing pod can have zero. Combining +// them would force consumers to disambiguate "is this critical +// operational or critical posture?" at every callsite. // // The Issue type is what /api/issues and the hub's fleet_issues MCP // tool emit. Severity is normalized to a 3-tier vocabulary @@ -36,8 +39,8 @@ type CELFilter = filter.Filter // Severity is the normalized 3-tier severity. Mapping rules: // -// critical = problem.critical | kyverno.fail|error -// warning = problem. | event.Warning | CRD-condition False | kyverno.warn +// critical = problem.critical +// warning = problem. | CRD-condition False // info = reserved (currently unused) // // problem severities other than "critical" all collapse to warning — see @@ -50,17 +53,17 @@ const ( SeverityWarning Severity = "warning" ) -// Source records which underlying detection channel emitted this -// issue. Useful for filtering ("only show me problems, not events") -// and for SPA copy that explains why a row appeared. +// Source records which underlying detection channel emitted this issue. +// It is an OUTPUT label (for SPA copy that explains why a row appeared, +// and as a CEL filter binding), not an input filter — issues composes all +// four sources unconditionally; detection provenance is not a triage axis. type Source string const ( SourceProblem Source = "problem" // radar's hardcoded per-kind detection SourceMissingRef Source = "missing_ref" // dangling-ref detection (Pod→missing PVC/CM/Secret/SA, HPA→missing target, Ingress→missing backend, etc.) - SourceEvent Source = "event" // K8s Warning events (recent) + SourceScheduling Source = "scheduling" // placement/admission/post-bind failures (unschedulable, quota/PodSecurity/webhook, CNI/volume) SourceCondition Source = "condition" // generic CRD .status.conditions[].status=False fallback - SourceKyverno Source = "kyverno" // Kyverno PolicyReport findings (opt-in) ) // Ref is a lightweight resource reference, used for owner pointers. @@ -72,10 +75,10 @@ type Ref struct { // Issue is the unified cluster-health record. // -// FirstSeen / LastSeen / Count are populated for events (which arrive -// pre-aggregated from the K8s API). For problems, conditions, and -// Kyverno findings, FirstSeen and LastSeen are both the snapshot time -// and Count = 1. +// All current sources are snapshot-derived with Count = 1. For problem / +// missing_ref / scheduling, LastSeen is the compose time and FirstSeen backs +// off by the observed problem duration; for condition rows, both timestamps +// are the condition's lastTransitionTime. type Issue struct { Severity Severity `json:"severity"` Source Source `json:"source"` @@ -107,33 +110,13 @@ type Issue struct { type Filters struct { Namespaces []string Severities []Severity - Sources []Source Kinds []string - // Since restricts event-source issues to this lookback window. - // Other sources are always current-snapshot, so this only affects - // SourceEvent. Zero means "no time restriction" (all cached events). - Since time.Duration // Limit caps the returned slice. Zero means default (200). Limit int - // IncludeEvents defaults to false. Warning events are the noisiest - // source by an order of magnitude — a single broken Pod emits a - // FailedScheduling / BackOff / etc. Event every few seconds, and - // the event informer retains them for the cache window (default 1h+). - // On a multi-thousand-Pod cluster this floods the Issue list with - // rows that mostly duplicate `problem` source (a CrashLoopBackOff - // Pod already shows up under SourceProblem). Treat events as opt-in; - // when enabled the caller should also pass a Since window (handler - // defaults to 1h when events are on and Since is zero). - IncludeEvents bool - // IncludeKyverno defaults to false. Kyverno PolicyReport findings - // are loud (a baseline cluster-pss profile alone emits 10+ findings - // per workload) and the default Issue view should not be dominated - // by best-practice/policy noise. Opt in via include_kyverno=true - // or by passing "kyverno" in the source list. - IncludeKyverno bool // Filter is an optional compiled CEL predicate evaluated against - // each composed Issue's row bindings. Compile happens in the - // handler (and is cached); this layer just runs the program. + // each composed Issue's row bindings (source is exposed there, so a + // power user can still slice by detection method). Compile happens in + // the handler (and is cached); this layer just runs the program. Filter *CELFilter // CanReadClusterScoped authorizes cluster-scoped Issue rows before // they are returned. Handlers provide a per-user SAR-backed predicate; diff --git a/internal/k8s/cache.go b/internal/k8s/cache.go index 48cc5d288..ac4dc8493 100644 --- a/internal/k8s/cache.go +++ b/internal/k8s/cache.go @@ -69,6 +69,7 @@ var deferredResources = map[string]bool{ "horizontalpodautoscalers": true, // problems detection, not critical for first render "serviceaccounts": true, // audit inheritance lookups, not first-render "limitranges": true, // audit inheritance lookups, not first-render + "resourcequotas": true, // scheduling/admission diagnostics, not first-render } // minimalFirstPaintSet is the subset of critical informers the home diff --git a/internal/k8s/capabilities.go b/internal/k8s/capabilities.go index 989fa5945..3a73038da 100644 --- a/internal/k8s/capabilities.go +++ b/internal/k8s/capabilities.go @@ -48,6 +48,7 @@ type ResourcePermissions struct { RoleBindings bool `json:"roleBindings"` ClusterRoleBindings bool `json:"clusterRoleBindings"` LimitRanges bool `json:"limitRanges"` + ResourceQuotas bool `json:"resourceQuotas"` Gateways bool `json:"gateways"` HTTPRoutes bool `json:"httpRoutes"` VerticalPodAutoscalers bool `json:"verticalPodAutoscalers"` @@ -582,6 +583,7 @@ func resourceProbeTargets(perms *ResourcePermissions) []resourceProbe { {key: k8score.PersistentVolumeClaims, gvr: schema.GroupVersionResource{Version: "v1", Resource: "persistentvolumeclaims"}, field: &perms.PersistentVolumeClaims}, {key: k8score.ServiceAccounts, gvr: schema.GroupVersionResource{Version: "v1", Resource: "serviceaccounts"}, field: &perms.ServiceAccounts}, {key: k8score.LimitRanges, gvr: schema.GroupVersionResource{Version: "v1", Resource: "limitranges"}, field: &perms.LimitRanges}, + {key: k8score.ResourceQuotas, gvr: schema.GroupVersionResource{Version: "v1", Resource: "resourcequotas"}, field: &perms.ResourceQuotas}, {key: k8score.Nodes, gvr: schema.GroupVersionResource{Version: "v1", Resource: "nodes"}, clusterOnly: true, field: &perms.Nodes}, {key: k8score.Namespaces, gvr: schema.GroupVersionResource{Version: "v1", Resource: "namespaces"}, clusterOnly: true, field: &perms.Namespaces}, {key: k8score.PersistentVolumes, gvr: schema.GroupVersionResource{Version: "v1", Resource: "persistentvolumes"}, clusterOnly: true, field: &perms.PersistentVolumes}, diff --git a/internal/k8s/fetch.go b/internal/k8s/fetch.go index eb865cad9..69a6e9ef5 100644 --- a/internal/k8s/fetch.go +++ b/internal/k8s/fetch.go @@ -448,6 +448,26 @@ func FetchResourceList(cache *ResourceCache, kind string, namespaces []string) ( return ToRuntimeObjects(items), nil }, ) + case "resourcequotas", "resourcequota": + if cache.ResourceQuotas() == nil { + return nil, fmt.Errorf("forbidden: resourcequotas") + } + return listPerNs( + func() ([]runtime.Object, error) { + items, err := cache.ResourceQuotas().List(labels.Everything()) + if err != nil { + return nil, err + } + return ToRuntimeObjects(items), nil + }, + func(ns string) ([]runtime.Object, error) { + items, err := cache.ResourceQuotas().ResourceQuotas(ns).List(labels.Everything()) + if err != nil { + return nil, err + } + return ToRuntimeObjects(items), nil + }, + ) case "roles", "role": if cache.Roles() == nil { return nil, fmt.Errorf("forbidden: roles") @@ -625,6 +645,11 @@ func FetchResource(cache *ResourceCache, kind, namespace, name string) (runtime. return nil, fmt.Errorf("forbidden: limitranges") } return cache.LimitRanges().LimitRanges(namespace).Get(name) + case "resourcequotas", "resourcequota": + if cache.ResourceQuotas() == nil { + return nil, fmt.Errorf("forbidden: resourcequotas") + } + return cache.ResourceQuotas().ResourceQuotas(namespace).Get(name) case "roles", "role": if cache.Roles() == nil { return nil, fmt.Errorf("forbidden: roles") @@ -720,6 +745,9 @@ func SetTypeMeta(resource any) { case *corev1.LimitRange: r.APIVersion = "v1" r.Kind = "LimitRange" + case *corev1.ResourceQuota: + r.APIVersion = "v1" + r.Kind = "ResourceQuota" case *rbacv1.Role: r.APIVersion = "rbac.authorization.k8s.io/v1" r.Kind = "Role" diff --git a/internal/k8s/policy_reports_testhooks.go b/internal/k8s/policy_reports_testhooks.go deleted file mode 100644 index e51bfc363..000000000 --- a/internal/k8s/policy_reports_testhooks.go +++ /dev/null @@ -1,76 +0,0 @@ -package k8s - -import ( - "fmt" - - "github.com/skyhook-io/radar/pkg/policyreports" -) - -// Test hooks for cross-package tests that need to inject lifecycle state -// into the PolicyReport index without running real warmup (which requires -// discovery + dynamic cache singletons). These are deliberately exported -// (capitalized "ForTest" suffix) so they can be called from -// internal/server's _test.go files. -// -// Naming: "ForTest" suffix is the convention used elsewhere in this -// codebase (e.g. timeline.ResetStore, k8s.InitTestResourceCache); it -// keeps them grep-able and unambiguously not part of the runtime surface. -// -// We don't gate these behind a `testing` build tag because Go doesn't -// support such a tag and the alternative (e.g. //go:build !prod) is -// noisy. The functions cost nothing at runtime (no init wiring) and -// callers don't accidentally invoke them — the names make the intent -// obvious. - -// LoadKyvernoDecisionForTest reads the current warmup decision atomic. -// Empty string means "no decision recorded yet" (the implicit warmup -// state). -func LoadKyvernoDecisionForTest() KyvernoStatus { - v, _ := kyvernoWarmupDecision.Load().(KyvernoStatus) - return v -} - -// StoreKyvernoDecisionForTest sets the warmup decision atomic. Use -// KyvernoStatus("") to clear it (re-arms the implicit "warmup" state). -func StoreKyvernoDecisionForTest(s KyvernoStatus) { - kyvernoWarmupDecision.Store(s) -} - -// LoadKyvernoIndexForTest returns the current PolicyReport index pointer -// (typed as `any` so callers don't need to import pkg/policyreports just -// to round-trip the value through cleanup). -func LoadKyvernoIndexForTest() any { - idx := policyReportIndex.Load() - if idx == nil { - // Return untyped nil so the caller's `if idx == nil` works - // without unwrapping a typed-nil interface. - return nil - } - return idx -} - -// StoreKyvernoIndexForTest sets the PolicyReport index pointer. Pass nil -// to clear (e.g. for not_installed / deferred / warmup states); pass the -// result of NewEmptyKyvernoIndexForTest for the ready state. -func StoreKyvernoIndexForTest(v any) { - if v == nil { - policyReportIndex.Store(nil) - return - } - idx, ok := v.(*policyreports.Index) - if !ok { - // Test-only hook: a wrong type here is a test bug, not a runtime - // condition to handle gracefully. Panic immediately so the test - // fails at the misuse site instead of producing confusing - // downstream failures. - panic(fmt.Sprintf("StoreKyvernoIndexForTest: want *policyreports.Index, got %T", v)) - } - policyReportIndex.Store(idx) -} - -// NewEmptyKyvernoIndexForTest returns a fresh empty index instance. -// Useful for simulating the "ready but no findings yet" state when -// testing handler behavior — the index exists, but All() returns nil. -func NewEmptyKyvernoIndexForTest() any { - return policyreports.NewIndex() -} diff --git a/internal/k8s/problems.go b/internal/k8s/problems.go index d2aa072b7..06ec683e7 100644 --- a/internal/k8s/problems.go +++ b/internal/k8s/problems.go @@ -170,6 +170,11 @@ func DetectProblems(cache *ResourceCache, namespace string) []Problem { if health == "healthy" { continue } + // Unschedulable pods are owned by the scheduling source, which + // names the offending constraint instead of a bare "Pending". + if IsPodUnschedulable(pod) { + continue + } ageDur := now.Sub(pod.CreationTimestamp.Time) severity := "high" if health == "error" { diff --git a/internal/k8s/scheduling.go b/internal/k8s/scheduling.go new file mode 100644 index 000000000..1a333b9f1 --- /dev/null +++ b/internal/k8s/scheduling.go @@ -0,0 +1,982 @@ +package k8s + +import ( + "fmt" + "regexp" + "slices" + "sort" + "strconv" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" +) + +// Scheduling failure decomposition. +// +// The kube-scheduler already did the root-cause analysis — it just hands +// it back as one opaque string in the FailedScheduling event and the +// Pod's PodScheduled=False condition message, e.g.: +// +// 0/5 nodes are available: 2 Insufficient cpu, 3 node(s) had untolerated +// taint {dedicated: gpu}. preemption: 0/5 nodes are available: 5 No +// preemption victims found for incoming pod. +// +// parseSchedulerMessage turns that into structured, per-predicate reasons +// so callers (the issues engine, MCP diagnose, the Pod UI banner) can show +// "why won't this schedule" without the operator re-reading scheduler prose. +// It is a pure function — the node-fit resolver (resolveUnsatisfiableNodeSelector) +// later joins NodeAffinitySelector reasons against the live node cache to name +// the specific offending label (e.g. "no node has kubernetes.io/arch=arm64"). +// Taint key/value come straight from the scheduler message (parseTaintPayload), +// not from a cache join. + +// SchedReasonClass is the predicate family a scheduling failure falls into. +type SchedReasonClass string + +const ( + SchedInsufficientResource SchedReasonClass = "InsufficientResource" + SchedUntoleratedTaint SchedReasonClass = "UntoleratedTaint" + SchedNodeAffinitySelector SchedReasonClass = "NodeAffinitySelector" + SchedPodAffinity SchedReasonClass = "PodAffinity" + SchedPodAntiAffinity SchedReasonClass = "PodAntiAffinity" + SchedTopologySpread SchedReasonClass = "TopologySpread" + SchedVolumeNodeAffinity SchedReasonClass = "VolumeNodeAffinity" + SchedVolumeBinding SchedReasonClass = "VolumeBinding" // unbound PVC / no available PVs to bind + SchedVolumeCount SchedReasonClass = "VolumeCount" + SchedNoPorts SchedReasonClass = "NoPorts" + SchedNodeUnschedulable SchedReasonClass = "NodeUnschedulable" // cordoned / not-ready / unschedulable taint + SchedOther SchedReasonClass = "Other" +) + +// SchedulingReason is one decomposed clause of a scheduler verdict. The +// side fields are populated only for their owning Class (Resource for +// SchedInsufficientResource; TaintKey/TaintValue for SchedUntoleratedTaint); +// other classes leave them zero. classifyClause is the sole producer and +// always sets Class + Raw. +type SchedulingReason struct { + Class SchedReasonClass + // NodeCount is how many nodes this clause rejected. 0 when the clause + // is whole-message (e.g. unbound PVC) or the count couldn't be parsed. + NodeCount int + // Resource is set for SchedInsufficientResource: "cpu", "memory", + // "ephemeral-storage", "pods", "nvidia.com/gpu", … + Resource string + // TaintKey / TaintValue are set for SchedUntoleratedTaint. TaintValue + // is empty for valueless taints (e.g. {node.kubernetes.io/unreachable}). + TaintKey string + TaintValue string + // Raw is the original clause text, preserved so callers can fall back + // to it for classes we don't further structure. + Raw string +} + +var ( + // "0/5 nodes are available" / "1/12 nodes are available" + reNodesAvailable = regexp.MustCompile(`(\d+)/(\d+)\s+nodes? are available`) + // leading integer count on a clause: "2 Insufficient cpu", "3 node(s) had…" + reLeadingCount = regexp.MustCompile(`^\s*(\d+)\s+`) + // "Insufficient " — resource may contain '.'/'-'/'/' + reInsufficient = regexp.MustCompile(`Insufficient\s+([A-Za-z0-9./_-]+)`) + // taint payload: "{key: value}" or "{key}" + reTaint = regexp.MustCompile(`\{([^}]*)\}`) +) + +// parseSchedulerMessage decomposes a scheduler verdict (from a +// FailedScheduling event message or a PodScheduled=False condition message) +// into structured reasons. totalNodes is the node count the scheduler +// considered (the denominator of "0/N nodes are available"); 0 when the +// message carries no such prefix. An empty/unrecognized message yields nil +// reasons so callers can fall back to the raw text. +func parseSchedulerMessage(msg string) (totalNodes int, reasons []SchedulingReason) { + msg = strings.TrimSpace(msg) + if msg == "" { + return 0, nil + } + + // Drop the "preemption: …" tail — it restates the same node set from + // the preemption scheduler's point of view and only adds noise. + if before, _, ok := strings.Cut(msg, ". preemption:"); ok { + msg = before + } else if before, _, ok := strings.Cut(msg, " preemption:"); ok { + msg = before + } + + if m := reNodesAvailable.FindStringSubmatch(msg); m != nil { + totalNodes, _ = strconv.Atoi(m[2]) + } + + // Everything after the first ":" is the comma-separated clause list. + // Messages without a colon (e.g. "pod has unbound immediate + // PersistentVolumeClaims") are treated as a single clause. + clauseStr := msg + if _, rest, ok := strings.Cut(msg, ":"); ok { + clauseStr = rest + } + clauseStr = strings.TrimRight(strings.TrimSpace(clauseStr), ".") + if clauseStr == "" { + return totalNodes, nil + } + + for clause := range strings.SplitSeq(clauseStr, ", ") { + clause = strings.TrimSpace(clause) + if clause == "" { + continue + } + if r, ok := classifyClause(clause); ok { + reasons = append(reasons, r) + } + } + return totalNodes, reasons +} + +// classifyClause maps one scheduler clause to a structured reason. The +// substring checks are ordered so the more specific phrasings win (e.g. +// "anti-affinity" before "affinity", "node affinity/selector" before the +// bare "affinity" used by pod-affinity). +func classifyClause(clause string) (SchedulingReason, bool) { + r := SchedulingReason{Raw: clause} + if m := reLeadingCount.FindStringSubmatch(clause); m != nil { + r.NodeCount, _ = strconv.Atoi(m[1]) + } + + lower := strings.ToLower(clause) + + switch { + case strings.Contains(clause, "Insufficient"): + r.Class = SchedInsufficientResource + if m := reInsufficient.FindStringSubmatch(clause); m != nil { + r.Resource = m[1] + } + case strings.Contains(lower, "too many pods"): + r.Class = SchedInsufficientResource + r.Resource = "pods" + case strings.Contains(lower, "untolerated taint"): + r.Class = SchedUntoleratedTaint + r.TaintKey, r.TaintValue = parseTaintPayload(clause) + // A cordon / not-ready taint is really a node-availability problem, + // not a pod-misconfiguration; classify it as such so the UI doesn't + // tell the user to "add a toleration" for node.kubernetes.io/*. + if isNodeLifecycleTaint(r.TaintKey) { + r.Class = SchedNodeUnschedulable + } + case strings.Contains(lower, "volume node affinity"): + // Must precede the bare "node affinity" check below — this clause + // contains the substring "node affinity" but is a volume-topology + // failure, not a pod node-affinity mismatch. + r.Class = SchedVolumeNodeAffinity + case strings.Contains(lower, "anti-affinity"): + r.Class = SchedPodAntiAffinity + case strings.Contains(lower, "node affinity") || strings.Contains(lower, "node selector"): + r.Class = SchedNodeAffinitySelector + case strings.Contains(lower, "pod affinity"): + r.Class = SchedPodAffinity + case strings.Contains(lower, "topology spread"): + r.Class = SchedTopologySpread + case strings.Contains(lower, "max volume count"): + r.Class = SchedVolumeCount + case strings.Contains(lower, "free ports"): + r.Class = SchedNoPorts + case strings.Contains(lower, "unbound") && strings.Contains(lower, "persistentvolumeclaim"), + strings.Contains(lower, "persistent volumes to bind"): + r.Class = SchedVolumeBinding + case strings.Contains(lower, "unschedulable"), strings.Contains(lower, "were not ready"): + r.Class = SchedNodeUnschedulable + default: + r.Class = SchedOther + } + return r, true +} + +// parseTaintPayload extracts key/value from an "untolerated taint {k: v}" +// or "{k}" clause. Returns empty strings if no {…} payload is present. +func parseTaintPayload(clause string) (key, value string) { + m := reTaint.FindStringSubmatch(clause) + if m == nil { + return "", "" + } + inner := strings.TrimSpace(m[1]) + if inner == "" { + return "", "" + } + if k, v, ok := strings.Cut(inner, ":"); ok { + return strings.TrimSpace(k), strings.TrimSpace(v) + } + return inner, "" +} + +// isNodeLifecycleTaint reports whether a taint key is one the control plane +// sets to mark a node temporarily unusable (cordon, not-ready, pressure), +// as opposed to an operator-applied dedicated/workload taint. +func isNodeLifecycleTaint(key string) bool { + return strings.HasPrefix(key, "node.kubernetes.io/") || + strings.HasPrefix(key, "node-role.kubernetes.io/") || + strings.HasPrefix(key, "node.cloudprovider.kubernetes.io/") +} + +// ---- Node-fit resolution ------------------------------------------------ +// +// The scheduler reports "N node(s) didn't match Pod's node affinity/selector" +// without naming WHICH label is unsatisfiable. resolveUnsatisfiableNodeSelector +// joins the pod's nodeSelector + required nodeAffinity against the fleet's +// node labels to name the specific offending key — turning the opaque verdict +// into "no node has kubernetes.io/arch=arm64 (6 nodes are amd64)". This is the +// step that makes arch/os/zone/instance-type mismatches self-explanatory. +// +// These functions are pure (operate on plain NodeFacts / PodPlacement); the +// detector populates them from the live node cache. + +// NodeFacts is the minimal per-node view the fit resolver needs. +type NodeFacts struct { + Name string + Labels map[string]string +} + +// MatchExpr is a node-affinity match expression (key, operator, values). +type MatchExpr struct { + Key string + Operator string // In, NotIn, Exists, DoesNotExist, Gt, Lt + Values []string +} + +// NodeSelectorTermFacts is one required nodeAffinity term — a node satisfies +// the term if it matches ALL of the term's expressions. +type NodeSelectorTermFacts struct { + Expressions []MatchExpr +} + +// PodPlacement is the pod's scheduling constraints, extracted from its spec. +type PodPlacement struct { + NodeSelector map[string]string + // RequiredNodeAffinity is the flattened requiredDuringScheduling terms. + // A node satisfies the affinity if it matches ANY term. + RequiredNodeAffinity []NodeSelectorTermFacts +} + +// resolveUnsatisfiableNodeSelector returns human-readable explanations of +// which label requirement no node satisfies, naming the offending key(s) +// and the values the fleet actually carries. Empty slice means the pod's +// label constraints are individually satisfiable (so the placement failure +// lies elsewhere — taints, resources, a term combination). +func resolveUnsatisfiableNodeSelector(p PodPlacement, nodes []NodeFacts) []string { + var out []string + + for _, k := range sortedKeys(p.NodeSelector) { + v := p.NodeSelector[k] + if countNodesWithLabel(nodes, k, v) == 0 { + out = append(out, explainMissingLabel(k, v, nodes)) + } + } + + if len(p.RequiredNodeAffinity) > 0 && !anyTermMatches(p.RequiredNodeAffinity, nodes) { + seen := map[string]bool{} + var affinityMsgs []string + for _, term := range p.RequiredNodeAffinity { + for _, e := range term.Expressions { + if countNodesMatchingExpr(nodes, e) == 0 { + msg := explainMissingExpr(e, nodes) + if !seen[msg] { + seen[msg] = true + affinityMsgs = append(affinityMsgs, msg) + } + } + } + } + if len(affinityMsgs) == 0 { + // Every expression is individually satisfiable but no single + // node satisfies a whole term — a constraint combination. + affinityMsgs = append(affinityMsgs, "no node satisfies the pod's required nodeAffinity term combination") + } + out = append(out, affinityMsgs...) + } + + return out +} + +func explainMissingLabel(key, val string, nodes []NodeFacts) string { + present := distinctLabelValues(nodes, key) + if len(present) == 0 { + return fmt.Sprintf("no node carries label %s (pod requires %s=%s)", key, key, val) + } + return fmt.Sprintf("no node has %s=%s — %d node(s) carry %s: [%s]", + key, val, countNodesWithLabelKey(nodes, key), key, strings.Join(present, ", ")) +} + +func explainMissingExpr(e MatchExpr, nodes []NodeFacts) string { + present := distinctLabelValues(nodes, e.Key) + switch e.Operator { + case "In": + if len(present) == 0 { + return fmt.Sprintf("no node carries label %s (pod requires %s in [%s])", e.Key, e.Key, strings.Join(e.Values, ", ")) + } + return fmt.Sprintf("no node has %s in [%s] — fleet %s: [%s]", e.Key, strings.Join(e.Values, ", "), e.Key, strings.Join(present, ", ")) + case "Exists": + return fmt.Sprintf("no node carries label %s (pod requires it to exist)", e.Key) + case "DoesNotExist": + return fmt.Sprintf("every node carries label %s (pod requires it absent)", e.Key) + case "NotIn": + return fmt.Sprintf("every node has %s in [%s] (pod requires otherwise)", e.Key, strings.Join(e.Values, ", ")) + default: + return fmt.Sprintf("no node satisfies nodeAffinity %s %s [%s]", e.Key, e.Operator, strings.Join(e.Values, ", ")) + } +} + +func anyTermMatches(terms []NodeSelectorTermFacts, nodes []NodeFacts) bool { + for _, n := range nodes { + for _, term := range terms { + if nodeMatchesTerm(n, term) { + return true + } + } + } + return false +} + +func nodeMatchesTerm(n NodeFacts, term NodeSelectorTermFacts) bool { + for _, e := range term.Expressions { + if !nodeMatchesExpr(n, e) { + return false + } + } + return true +} + +func nodeMatchesExpr(n NodeFacts, e MatchExpr) bool { + v, ok := n.Labels[e.Key] + switch e.Operator { + case "In": + return ok && slices.Contains(e.Values, v) + case "NotIn": + return !ok || !slices.Contains(e.Values, v) + case "Exists": + return ok + case "DoesNotExist": + return !ok + case "Gt", "Lt": + if !ok || len(e.Values) == 0 { + return false + } + nv, err1 := strconv.ParseInt(v, 10, 64) + bound, err2 := strconv.ParseInt(e.Values[0], 10, 64) + if err1 != nil || err2 != nil { + return false + } + if e.Operator == "Gt" { + return nv > bound + } + return nv < bound + default: + return false + } +} + +func countNodesMatchingExpr(nodes []NodeFacts, e MatchExpr) int { + n := 0 + for _, node := range nodes { + if nodeMatchesExpr(node, e) { + n++ + } + } + return n +} + +func countNodesWithLabel(nodes []NodeFacts, key, val string) int { + n := 0 + for _, node := range nodes { + if node.Labels[key] == val { + n++ + } + } + return n +} + +func countNodesWithLabelKey(nodes []NodeFacts, key string) int { + n := 0 + for _, node := range nodes { + if _, ok := node.Labels[key]; ok { + n++ + } + } + return n +} + +func distinctLabelValues(nodes []NodeFacts, key string) []string { + seen := map[string]bool{} + var out []string + for _, node := range nodes { + if v, ok := node.Labels[key]; ok && !seen[v] { + seen[v] = true + out = append(out, v) + } + } + sort.Strings(out) + return out +} + +func sortedKeys(m map[string]string) []string { + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + sort.Strings(out) + return out +} + +// ---- Bind-time detection ------------------------------------------------ + +// DetectSchedulingProblems flags Pending pods the scheduler tried to place +// and rejected (PodScheduled=False). It reads the scheduler's own verdict +// from the condition message — current state, one row per pod, no event +// noise — decomposes it, and resolves node-affinity/selector misses against +// the live node cache so the Message names the specific offending constraint +// (arch/zone/taint/resources) instead of just "Pending". namespace="" scans +// all namespaces. Post-bind (ContainerCreating/CNI/volume) and admission +// (quota with no Pod) failures are handled by separate detectors. +func DetectSchedulingProblems(cache *ResourceCache, namespace string) []Problem { + if cache == nil { + return nil + } + var problems []Problem + now := time.Now() + nodes := schedulingNodeFacts(cache) + + for _, pods := range listPodsByNamespace(cache, namespace) { + for _, pod := range pods { + if pod.Status.Phase != corev1.PodPending { + continue + } + cond := podScheduledCondition(pod) + // PodScheduled=False with reason=Unschedulable is the scheduler's + // definitive "I tried and couldn't place this" — present only after + // a real scheduling attempt, so no age grace is needed. reason= + // SchedulingGated is NOT a failure: the scheduler hasn't tried yet + // because the pod carries scheduling gates (a controller will lift + // them), so it must not surface as unschedulable. + if cond == nil || cond.Status != corev1.ConditionFalse || cond.Reason != corev1.PodReasonUnschedulable { + continue + } + ageDur := now.Sub(pod.CreationTimestamp.Time) + dur := ageDur + if !cond.LastTransitionTime.IsZero() { + dur = now.Sub(cond.LastTransitionTime.Time) + } + problems = append(problems, Problem{ + Kind: "Pod", + Namespace: pod.Namespace, + Name: pod.Name, + Severity: schedulingSeverity(dur), + Reason: "Unschedulable", + Message: describeUnschedulable(pod, cond.Message, nodes), + Age: FormatAge(ageDur), + AgeSeconds: int64(ageDur.Seconds()), + Duration: FormatAge(dur), + DurationSeconds: int64(dur.Seconds()), + }) + } + } + return problems +} + +func podScheduledCondition(pod *corev1.Pod) *corev1.PodCondition { + for i := range pod.Status.Conditions { + if pod.Status.Conditions[i].Type == corev1.PodScheduled { + return &pod.Status.Conditions[i] + } + } + return nil +} + +// IsPodUnschedulable reports whether the scheduler tried and failed to place +// the pod (PodScheduled=False). Such pods are owned by SourceScheduling, +// which explains WHY — the generic problem detector skips them to avoid a +// duplicate bare "Pending" row. +func IsPodUnschedulable(pod *corev1.Pod) bool { + c := podScheduledCondition(pod) + // Only reason=Unschedulable counts; reason=SchedulingGated is an + // intentional not-yet-scheduled state, not a placement failure. + return c != nil && c.Status == corev1.ConditionFalse && c.Reason == corev1.PodReasonUnschedulable +} + +// schedulingSeverity ramps with how long the pod has been unschedulable: a +// momentary miss right after creation is usually transient; one stuck for +// many minutes is a real, operator-actionable failure. +func schedulingSeverity(d time.Duration) string { + switch { + case d >= 10*time.Minute: + return "critical" + case d >= 2*time.Minute: + return "high" + default: + return "medium" + } +} + +// describeUnschedulable builds the operator-facing message: lead with the +// resolved offending constraint (the value the bare scheduler verdict hides) +// when we can name it, then summarize the scheduler's per-predicate counts. +// Pure over its inputs (pod spec + verdict string + node facts). +func describeUnschedulable(pod *corev1.Pod, schedMsg string, nodes []NodeFacts) string { + total, reasons := parseSchedulerMessage(schedMsg) + + var parts []string + resolvedAffinity := false + for _, r := range reasons { + if r.Class == SchedNodeAffinitySelector { + if resolved := resolveUnsatisfiableNodeSelector(extractPodPlacement(pod), nodes); len(resolved) > 0 { + parts = append(parts, resolved...) + resolvedAffinity = true + } + break + } + } + if summary := summarizeReasons(reasons, resolvedAffinity); summary != "" { + parts = append(parts, summary) + } + if len(parts) == 0 { + if msg := strings.TrimSpace(schedMsg); msg != "" { + return msg + } + return "Pod is unschedulable" + } + msg := strings.Join(parts, "; ") + if total > 0 { + msg = fmt.Sprintf("%s (0/%d nodes available)", msg, total) + } + return msg +} + +// summarizeReasons renders the parsed predicate counts into a compact phrase. +// When skipAffinity is set, the generic node-affinity/selector clause is +// omitted because describeUnschedulable already emitted the resolved label. +func summarizeReasons(reasons []SchedulingReason, skipAffinity bool) string { + var parts []string + for _, r := range reasons { + switch r.Class { + case SchedInsufficientResource: + res := r.Resource + if res == "" { + res = "resources" + } + parts = append(parts, fmt.Sprintf("%s insufficient %s", nodesPhrase(r.NodeCount), res)) + case SchedUntoleratedTaint: + t := r.TaintKey + if r.TaintValue != "" { + t += "=" + r.TaintValue + } + parts = append(parts, fmt.Sprintf("%s untolerated taint %s", nodesPhrase(r.NodeCount), t)) + case SchedNodeAffinitySelector: + if skipAffinity { + continue + } + parts = append(parts, fmt.Sprintf("%s node affinity/selector mismatch", nodesPhrase(r.NodeCount))) + case SchedPodAffinity: + parts = append(parts, fmt.Sprintf("%s pod affinity unmet", nodesPhrase(r.NodeCount))) + case SchedPodAntiAffinity: + parts = append(parts, fmt.Sprintf("%s pod anti-affinity conflict", nodesPhrase(r.NodeCount))) + case SchedTopologySpread: + parts = append(parts, fmt.Sprintf("%s topology-spread unmet", nodesPhrase(r.NodeCount))) + case SchedVolumeNodeAffinity: + parts = append(parts, fmt.Sprintf("%s volume node-affinity conflict", nodesPhrase(r.NodeCount))) + case SchedVolumeBinding: + parts = append(parts, "unbound PersistentVolumeClaim") + case SchedVolumeCount: + parts = append(parts, fmt.Sprintf("%s at max volume count", nodesPhrase(r.NodeCount))) + case SchedNoPorts: + parts = append(parts, fmt.Sprintf("%s no free host ports", nodesPhrase(r.NodeCount))) + case SchedNodeUnschedulable: + parts = append(parts, fmt.Sprintf("%s cordoned/not-ready", nodesPhrase(r.NodeCount))) + default: + if r.Raw != "" { + parts = append(parts, r.Raw) + } + } + } + return strings.Join(parts, ", ") +} + +func nodesPhrase(n int) string { + if n <= 0 { + return "node(s)" + } + return fmt.Sprintf("%d node(s)", n) +} + +// extractPodPlacement pulls the pod's node-targeting constraints (nodeSelector +// + required nodeAffinity matchExpressions) into the resolver's plain shape. +func extractPodPlacement(pod *corev1.Pod) PodPlacement { + p := PodPlacement{NodeSelector: pod.Spec.NodeSelector} + if pod.Spec.Affinity == nil || pod.Spec.Affinity.NodeAffinity == nil { + return p + } + req := pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution + if req == nil { + return p + } + for _, term := range req.NodeSelectorTerms { + var t NodeSelectorTermFacts + for _, e := range term.MatchExpressions { + t.Expressions = append(t.Expressions, MatchExpr{ + Key: e.Key, + Operator: string(e.Operator), + Values: e.Values, + }) + } + if len(t.Expressions) > 0 { + p.RequiredNodeAffinity = append(p.RequiredNodeAffinity, t) + } + } + return p +} + +// schedulingNodeFacts snapshots the node cache into the resolver's plain +// NodeFacts shape (labels + taints + cordon state). +func schedulingNodeFacts(cache *ResourceCache) []NodeFacts { + lister := cache.Nodes() + if lister == nil { + return nil + } + nodeList, _ := lister.List(labels.Everything()) + facts := make([]NodeFacts, 0, len(nodeList)) + for _, n := range nodeList { + facts = append(facts, NodeFacts{Name: n.Name, Labels: n.Labels}) + } + return facts +} + +// ---- Admission-layer detection ------------------------------------------ +// +// The layer where NO pod is ever created: the controller's pod template is +// rejected at admission, so there's no Pod to inspect — the Deployment just +// sits at "Progressing". Detected reactively from controller FailedCreate +// events naming the workload blocked right now (exceeded quota / LimitRange / +// PodSecurity / webhook). Proactive "quota near/at limit" is deliberately NOT +// surfaced here — a saturated quota is namespace capacity context, not a live +// failure, and belongs in the Namespace quota view, not the issue stream. + +// admissionFailureWindow bounds how recently a FailedCreate must have fired +// to count as "still happening" — a stuck controller re-emits continuously, +// so a fresh LastTimestamp means the failure is active. +const admissionFailureWindow = 30 * time.Minute + +// DetectAdmissionProblems flags pod-template rejections at admission time. +// namespace="" scans all namespaces. +func DetectAdmissionProblems(cache *ResourceCache, namespace string) []Problem { + if cache == nil { + return nil + } + return detectAdmissionFailures(cache, namespace) +} + +func detectAdmissionFailures(cache *ResourceCache, namespace string) []Problem { + if cache.Events() == nil { + return nil + } + var events []*corev1.Event + if namespace != "" { + events, _ = cache.Events().Events(namespace).List(labels.Everything()) + } else { + events, _ = cache.Events().List(labels.Everything()) + } + + now := time.Now() + // One row per blocked controller, showing the CURRENT blocker. A workload + // emits a FailedCreate per attempt (each with a different generated pod name + // → distinct cached events), and the active blocker can change within the + // window (quota cleared, webhook now rejects). Informer List order is + // arbitrary, so keep the LATEST event by LastTimestamp per object rather + // than whichever happened to be iterated first. + type admCandidate struct { + ev *corev1.Event + reason string + } + latest := map[string]admCandidate{} + var order []string + for _, e := range events { + if e.Reason != "FailedCreate" { + continue + } + if t := eventLastTime(e); !t.IsZero() && now.Sub(t) > admissionFailureWindow { + continue // stale — the controller stopped retrying + } + reason, ok := classifyAdmissionFailure(e.Message) + if !ok { + continue + } + obj := e.InvolvedObject + // A blocked controller re-emits FailedCreate continuously, but a since- + // recovered one's event lingers for the whole window — cross-check + // current state so we don't flag a now-healthy workload as critical. + if !admissionTargetStillBlocked(cache, obj) { + continue + } + key := obj.Kind + "/" + obj.Namespace + "/" + obj.Name + if cur, exists := latest[key]; exists { + if eventLastTime(e).After(eventLastTime(cur.ev)) { + latest[key] = admCandidate{ev: e, reason: reason} + } + continue + } + latest[key] = admCandidate{ev: e, reason: reason} + order = append(order, key) + } + + problems := make([]Problem, 0, len(order)) + for _, key := range order { + c := latest[key] + obj := c.ev.InvolvedObject + ageDur := now.Sub(eventFirstTime(c.ev)) + problems = append(problems, Problem{ + Kind: obj.Kind, + Namespace: obj.Namespace, + Name: obj.Name, + Severity: "critical", + Reason: c.reason, + Message: "pod creation blocked: " + strings.TrimSpace(c.ev.Message), + Age: FormatAge(ageDur), + AgeSeconds: int64(ageDur.Seconds()), + Duration: FormatAge(ageDur), + DurationSeconds: int64(ageDur.Seconds()), + }) + } + return problems +} + +// eventLastTime / eventFirstTime return the most-recent / earliest timestamp on +// an Event, falling back to EventTime (events API v1) when the legacy +// First/LastTimestamp fields are unset. +func eventLastTime(e *corev1.Event) time.Time { + if !e.LastTimestamp.Time.IsZero() { + return e.LastTimestamp.Time + } + return e.EventTime.Time +} + +func eventFirstTime(e *corev1.Event) time.Time { + if !e.FirstTimestamp.Time.IsZero() { + return e.FirstTimestamp.Time + } + return e.EventTime.Time +} + +// admissionTargetStillBlocked reports whether the controller named by a +// FailedCreate event still has unmet replicas, i.e. the rejection is active. +// A recovered workload has its replicas, so its lingering event is skipped. +// Unknown kinds / not-found default to true — never drop genuine coverage. +func admissionTargetStillBlocked(cache *ResourceCache, obj corev1.ObjectReference) bool { + // "Blocked" means the controller still can't CREATE its pods — measured by + // created-count (Status.Replicas / CurrentNumberScheduled) below desired, + // NOT readiness. A workload whose pods were created but stay not-ready for + // another reason (e.g. unschedulable after a quota was raised) has its pods + // and is no longer admission-blocked. + switch obj.Kind { + case "ReplicaSet": + if l := cache.ReplicaSets(); l != nil { + if rs, err := l.ReplicaSets(obj.Namespace).Get(obj.Name); err == nil { + return rs.Status.Replicas < schedDesiredReplicas(rs.Spec.Replicas) + } + } + case "Deployment": + if l := cache.Deployments(); l != nil { + if d, err := l.Deployments(obj.Namespace).Get(obj.Name); err == nil { + return d.Status.Replicas < schedDesiredReplicas(d.Spec.Replicas) + } + } + case "StatefulSet": + if l := cache.StatefulSets(); l != nil { + if ss, err := l.StatefulSets(obj.Namespace).Get(obj.Name); err == nil { + return ss.Status.Replicas < schedDesiredReplicas(ss.Spec.Replicas) + } + } + case "DaemonSet": + if l := cache.DaemonSets(); l != nil { + if ds, err := l.DaemonSets(obj.Namespace).Get(obj.Name); err == nil { + return ds.Status.CurrentNumberScheduled < ds.Status.DesiredNumberScheduled + } + } + case "Job": + if l := cache.Jobs(); l != nil { + if j, err := l.Jobs(obj.Namespace).Get(obj.Name); err == nil { + // Only "blocked" if the Job has created NO pod yet — any of + // Active/Succeeded/Failed > 0 means a pod was created (so the + // rejection isn't admission-from-the-start), and a stale quota + // event shouldn't surface for it. (Trade-off: a Job that ran + // some pods, then gets quota-blocked mid-retry, is not flagged.) + return j.Status.Active == 0 && j.Status.Succeeded == 0 && j.Status.Failed == 0 + } + } + } + return true +} + +func schedDesiredReplicas(r *int32) int32 { + if r == nil { + return 1 + } + return *r +} + +// classifyAdmissionFailure maps a FailedCreate event message to a reason. +// Returns ok=false for FailedCreate messages that aren't admission denials +// (e.g. transient "object is being deleted") so we don't over-report. +func classifyAdmissionFailure(msg string) (string, bool) { + lower := strings.ToLower(msg) + switch { + case strings.Contains(lower, "exceeded quota"), strings.Contains(lower, "failed quota"): + return "QuotaExceeded", true + case strings.Contains(lower, "violates podsecurity"), strings.Contains(lower, "violates pod security"): + return "PodSecurityViolation", true + case strings.Contains(lower, "admission webhook") && strings.Contains(lower, "denied"): + return "WebhookDenied", true + case strings.Contains(lower, "forbidden") && (strings.Contains(lower, "limitrange") || + strings.Contains(lower, "maximum") || strings.Contains(lower, "minimum")): + return "LimitRangeViolation", true + default: + return "", false + } +} + +// ---- Post-bind detection ------------------------------------------------ +// +// The pod was scheduled (a node accepted it) but the kubelet can't bring it +// up — stuck in ContainerCreating because the CNI can't hand out an IP or the +// CSI can't attach/mount a volume. radar otherwise treats ContainerCreating +// as benign, so these silently sit as "Pending". The failure detail lives in +// kubelet events (FailedCreatePodSandBox / FailedMount / FailedAttachVolume), +// so this detector is event-driven, cross-checked against still-stuck pods so +// a pod that recovered after a retry isn't falsely flagged. + +const postBindFailureWindow = 10 * time.Minute + +var postBindSeverity = map[string]string{ + "IPExhaustion": "critical", + "SandboxCreationFailed": "high", + "VolumeMultiAttach": "critical", + "VolumeAttach": "high", + "VolumeMount": "high", +} + +// DetectPostBindProblems flags pods stuck in ContainerCreating due to CNI/IP +// or volume failures. namespace="" scans all namespaces. +func DetectPostBindProblems(cache *ResourceCache, namespace string) []Problem { + if cache == nil || cache.Events() == nil { + return nil + } + stuck := stuckScheduledPods(cache, namespace) + if len(stuck) == 0 { + return nil + } + + var events []*corev1.Event + if namespace != "" { + events, _ = cache.Events().Events(namespace).List(labels.Everything()) + } else { + events, _ = cache.Events().List(labels.Everything()) + } + + now := time.Now() + // One row per stuck pod, showing the CURRENT blocker. The kubelet + // re-emits a post-bind event per retry and the active cause can change + // (NetworkNotReady → FailedMount). Informer List order is arbitrary, so + // keep the LATEST event by LastTimestamp per pod rather than whichever was + // iterated first — mirrors detectAdmissionFailures. + type pbCandidate struct { + ev *corev1.Event + reason string + } + latest := map[string]pbCandidate{} + var order []string + for _, e := range events { + if e.InvolvedObject.Kind != "Pod" { + continue + } + reason, ok := classifyPostBindFailure(e.Reason, e.Message) + if !ok { + continue + } + if t := eventLastTime(e); !t.IsZero() && now.Sub(t) > postBindFailureWindow { + continue + } + key := e.InvolvedObject.Namespace + "/" + e.InvolvedObject.Name + if _, isStuck := stuck[key]; !isStuck { + continue + } + if cur, exists := latest[key]; exists { + if eventLastTime(e).After(eventLastTime(cur.ev)) { + latest[key] = pbCandidate{ev: e, reason: reason} + } + continue + } + latest[key] = pbCandidate{ev: e, reason: reason} + order = append(order, key) + } + + problems := make([]Problem, 0, len(order)) + for _, key := range order { + c := latest[key] + pod := stuck[key] + severity := postBindSeverity[c.reason] + if severity == "" { + severity = "high" + } + ageDur := now.Sub(pod.CreationTimestamp.Time) + problems = append(problems, Problem{ + Kind: "Pod", + Namespace: pod.Namespace, + Name: pod.Name, + Severity: severity, + Reason: c.reason, + Message: "stuck creating: " + strings.TrimSpace(c.ev.Message), + Age: FormatAge(ageDur), + AgeSeconds: int64(ageDur.Seconds()), + Duration: FormatAge(ageDur), + DurationSeconds: int64(ageDur.Seconds()), + }) + } + return problems +} + +// stuckScheduledPods returns Pending pods that the scheduler DID place +// (PodScheduled is not False) — i.e. owned by the post-bind layer, not the +// bind-time detector. Keyed "namespace/name". +func stuckScheduledPods(cache *ResourceCache, namespace string) map[string]*corev1.Pod { + out := map[string]*corev1.Pod{} + for _, pods := range listPodsByNamespace(cache, namespace) { + for _, pod := range pods { + if pod.Status.Phase != corev1.PodPending { + continue + } + if cond := podScheduledCondition(pod); cond != nil && cond.Status == corev1.ConditionFalse { + continue // unschedulable — the bind-time detector owns it + } + out[pod.Namespace+"/"+pod.Name] = pod + } + } + return out +} + +// classifyPostBindFailure maps a kubelet event (reason + message) to a +// post-bind failure class, distinguishing IP exhaustion from generic sandbox +// failures and multi-attach from generic volume-attach errors. +func classifyPostBindFailure(reason, msg string) (string, bool) { + lower := strings.ToLower(msg) + switch { + case reason == "FailedCreatePodSandBox" || strings.Contains(lower, "failed to create pod sandbox"): + if strings.Contains(lower, "assign an ip") || + strings.Contains(lower, "insufficientfreeaddresses") || + strings.Contains(lower, "no ip addresses available") || + strings.Contains(lower, "all ip addresses") { + return "IPExhaustion", true + } + return "SandboxCreationFailed", true + case reason == "FailedAttachVolume": + if strings.Contains(lower, "multi-attach") { + return "VolumeMultiAttach", true + } + return "VolumeAttach", true + case reason == "FailedMount": + return "VolumeMount", true + default: + return "", false + } +} diff --git a/internal/k8s/scheduling_integration_test.go b/internal/k8s/scheduling_integration_test.go new file mode 100644 index 000000000..56aee45f7 --- /dev/null +++ b/internal/k8s/scheduling_integration_test.go @@ -0,0 +1,241 @@ +package k8s + +import ( + "strings" + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" +) + +func ptr32(i int32) *int32 { return &i } + +// Exercises the bind-time detector end-to-end: a Pending pod the scheduler +// rejected on arch, with the node-fit resolver naming the offending label. +func TestDetectSchedulingProblems_BindTime(t *testing.T) { + defer ResetTestState() + node := func(name string) *corev1.Node { + return &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: name, Labels: map[string]string{"kubernetes.io/arch": "amd64"}}} + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "web", Namespace: "prod"}, + Spec: corev1.PodSpec{NodeSelector: map[string]string{"kubernetes.io/arch": "arm64"}}, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{{ + Type: corev1.PodScheduled, + Status: corev1.ConditionFalse, + Reason: "Unschedulable", + Message: "0/2 nodes are available: 2 node(s) didn't match Pod's node affinity/selector.", + }}, + }, + } + if err := InitTestResourceCache(fake.NewClientset(node("n1"), node("n2"), pod)); err != nil { + t.Fatalf("InitTestResourceCache: %v", err) + } + problems := DetectSchedulingProblems(GetResourceCache(), "prod") + + if !findProblem(problems, "Pod", "prod", "web", "Unschedulable") { + t.Fatalf("expected Unschedulable Pod problem, got %+v", problems) + } + for _, p := range problems { + if p.Name == "web" { + for _, want := range []string{"kubernetes.io/arch", "arm64", "amd64"} { + if !strings.Contains(p.Message, want) { + t.Errorf("message %q should name the offending label %q", p.Message, want) + } + } + } + } +} + +// Exercises the admission FailedCreate path: dedup to one row per object, the +// recovered-workload cross-check (created-but-not-ready is skipped), and that +// the LATEST event wins when the active blocker changed (quota → webhook). +func TestDetectAdmissionProblems_FailedCreateCrossCheck(t *testing.T) { + defer ResetTestState() + // replicas = pods actually CREATED. "blocked" = couldn't create (replicas<2); + // created-but-not-ready (replicas==2, ready==0, e.g. now unschedulable) is + // NOT admission-blocked and must be skipped. + rs := func(name string, replicas int32) *appsv1.ReplicaSet { + return &appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "prod"}, + Spec: appsv1.ReplicaSetSpec{Replicas: ptr32(2)}, + Status: appsv1.ReplicaSetStatus{Replicas: replicas, ReadyReplicas: 0}, + } + } + evt := func(name, rsName, msg string, last metav1.Time) *corev1.Event { + return &corev1.Event{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "prod"}, + InvolvedObject: corev1.ObjectReference{Kind: "ReplicaSet", Namespace: "prod", Name: rsName}, + Reason: "FailedCreate", + Type: corev1.EventTypeWarning, + Message: msg, + LastTimestamp: last, + } + } + quotaMsg := `Error creating: pods "x" is forbidden: exceeded quota: mem-quota, used: requests.memory=2Gi, limited: requests.memory=2Gi` + webhookMsg := `Error creating: admission webhook "vpod.example.com" denied the request: blocked` + nowT := metav1.Now() + oldT := metav1.NewTime(nowT.Add(-10 * time.Minute)) + + // rs-blocked has two events: an OLDER quota rejection and a NEWER webhook + // rejection (the active blocker changed). Expect exactly one row, carrying + // the LATEST reason (webhook) — not whichever the informer iterates first. + if err := InitTestResourceCache(fake.NewClientset( + rs("rs-blocked", 0), rs("rs-ok", 2), + evt("e1", "rs-blocked", quotaMsg, oldT), evt("e1b", "rs-blocked", webhookMsg, nowT), + evt("e2", "rs-ok", quotaMsg, nowT), + )); err != nil { + t.Fatalf("InitTestResourceCache: %v", err) + } + problems := DetectAdmissionProblems(GetResourceCache(), "prod") + + if !findProblem(problems, "ReplicaSet", "prod", "rs-blocked", "WebhookDenied") { + t.Errorf("rs-blocked should surface the LATEST blocker (WebhookDenied), got %+v", problems) + } + blockedRows := 0 + for _, p := range problems { + if p.Name == "rs-blocked" { + blockedRows++ + if p.Reason == "QuotaExceeded" { + t.Errorf("stale (older) quota event must not win over the newer webhook one: %+v", p) + } + } + if p.Name == "rs-ok" { + t.Errorf("ReplicaSet with pods created (replicas met) but not ready — e.g. now unschedulable — is not admission-blocked and must be skipped: %+v", p) + } + } + if blockedRows != 1 { + t.Errorf("expected exactly 1 row for rs-blocked (deduped by object), got %d: %+v", blockedRows, problems) + } +} + +// A SchedulingGated pod has PodScheduled=False but reason=SchedulingGated — +// the scheduler hasn't tried yet because the pod carries scheduling gates. +// That's an intentional not-yet-scheduled state, not a placement failure, so +// it must NOT surface as Unschedulable (matching the frontend's reason gate). +func TestDetectSchedulingProblems_SchedulingGatedIsNotUnschedulable(t *testing.T) { + defer ResetTestState() + gated := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "gated", Namespace: "prod"}, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{{ + Type: corev1.PodScheduled, + Status: corev1.ConditionFalse, + Reason: corev1.PodReasonSchedulingGated, + Message: "Scheduling is blocked due to non-empty scheduling gates", + }}, + }, + } + if err := InitTestResourceCache(fake.NewClientset(gated)); err != nil { + t.Fatalf("InitTestResourceCache: %v", err) + } + if IsPodUnschedulable(gated) { + t.Errorf("SchedulingGated pod must not be reported unschedulable") + } + for _, p := range DetectSchedulingProblems(GetResourceCache(), "prod") { + if p.Name == "gated" { + t.Errorf("SchedulingGated pod must not surface a scheduling problem: %+v", p) + } + } +} + +// Exercises the post-bind detector's latest-event-wins dedup: a pod stuck +// scheduled (Pending, PodScheduled!=False) with two kubelet events — an older +// NetworkNotReady and a newer FailedMount — yields one row carrying the LATEST +// blocker, not whichever the informer iterated first. +func TestDetectPostBindProblems_LatestEventWins(t *testing.T) { + defer ResetTestState() + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "web", Namespace: "prod", CreationTimestamp: metav1.NewTime(time.Now().Add(-8 * time.Minute))}, + Status: corev1.PodStatus{Phase: corev1.PodPending}, // scheduled (no PodScheduled=False condition) + } + nowT := metav1.Now() + oldT := metav1.NewTime(nowT.Add(-5 * time.Minute)) + ev := func(name, reason, msg string, last metav1.Time) *corev1.Event { + return &corev1.Event{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "prod"}, + InvolvedObject: corev1.ObjectReference{Kind: "Pod", Namespace: "prod", Name: "web"}, + Reason: reason, + Type: corev1.EventTypeWarning, + Message: msg, + LastTimestamp: last, + } + } + if err := InitTestResourceCache(fake.NewClientset( + pod, + ev("e1", "FailedCreatePodSandBox", "failed to create pod sandbox: network is not ready", oldT), + ev("e2", "FailedMount", "Unable to attach or mount volumes: timed out waiting for the condition", nowT), + )); err != nil { + t.Fatalf("InitTestResourceCache: %v", err) + } + problems := DetectPostBindProblems(GetResourceCache(), "prod") + + if !findProblem(problems, "Pod", "prod", "web", "VolumeMount") { + t.Fatalf("expected the LATEST blocker (VolumeMount) to win, got %+v", problems) + } + rows := 0 + for _, p := range problems { + if p.Name == "web" { + rows++ + if p.Reason == "SandboxCreationFailed" { + t.Errorf("stale (older) sandbox event must not win over the newer mount one: %+v", p) + } + } + } + if rows != 1 { + t.Errorf("expected exactly 1 post-bind row for web (deduped by pod), got %d: %+v", rows, problems) + } +} + +// Exercises the cross-check for Job + DaemonSet, whose created-count signals +// differ from the replica kinds: a Job that created no pod and a partially +// scheduled DaemonSet are still blocked; a terminally-failed Job (Failed>0) and +// a fully-scheduled DaemonSet are not, so stale quota events must not surface. +func TestDetectAdmissionProblems_JobAndDaemonSetCrossCheck(t *testing.T) { + defer ResetTestState() + evt := func(name, kind, objName string) *corev1.Event { + return &corev1.Event{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "prod"}, + InvolvedObject: corev1.ObjectReference{Kind: kind, Namespace: "prod", Name: objName}, + Reason: "FailedCreate", + Type: corev1.EventTypeWarning, + Message: `Error creating: pods "x" is forbidden: exceeded quota: q, used: pods=1, limited: pods=1`, + LastTimestamp: metav1.Now(), + } + } + jobBlocked := &batchv1.Job{ObjectMeta: metav1.ObjectMeta{Name: "job-blocked", Namespace: "prod"}} // all counters 0 → created nothing → blocked + jobFailed := &batchv1.Job{ObjectMeta: metav1.ObjectMeta{Name: "job-failed", Namespace: "prod"}, Status: batchv1.JobStatus{Failed: 3}} + dsBlocked := &appsv1.DaemonSet{ObjectMeta: metav1.ObjectMeta{Name: "ds-blocked", Namespace: "prod"}, Status: appsv1.DaemonSetStatus{CurrentNumberScheduled: 1, DesiredNumberScheduled: 3}} + dsOk := &appsv1.DaemonSet{ObjectMeta: metav1.ObjectMeta{Name: "ds-ok", Namespace: "prod"}, Status: appsv1.DaemonSetStatus{CurrentNumberScheduled: 3, DesiredNumberScheduled: 3}} + + if err := InitTestResourceCache(fake.NewClientset( + jobBlocked, jobFailed, dsBlocked, dsOk, + evt("je1", "Job", "job-blocked"), evt("je2", "Job", "job-failed"), + evt("de1", "DaemonSet", "ds-blocked"), evt("de2", "DaemonSet", "ds-ok"), + )); err != nil { + t.Fatalf("InitTestResourceCache: %v", err) + } + problems := DetectAdmissionProblems(GetResourceCache(), "prod") + + if !findProblem(problems, "Job", "prod", "job-blocked", "QuotaExceeded") { + t.Errorf("Job that created no pod should surface QuotaExceeded, got %+v", problems) + } + if !findProblem(problems, "DaemonSet", "prod", "ds-blocked", "QuotaExceeded") { + t.Errorf("partially-scheduled DaemonSet should surface QuotaExceeded, got %+v", problems) + } + for _, p := range problems { + if p.Name == "job-failed" { + t.Errorf("terminally-failed Job (Failed>0) created a pod, so it's not admission-blocked and must be skipped: %+v", p) + } + if p.Name == "ds-ok" { + t.Errorf("fully-scheduled DaemonSet must be skipped: %+v", p) + } + } +} diff --git a/internal/k8s/scheduling_test.go b/internal/k8s/scheduling_test.go new file mode 100644 index 000000000..eb08eed5b --- /dev/null +++ b/internal/k8s/scheduling_test.go @@ -0,0 +1,329 @@ +package k8s + +import ( + "strings" + "testing" + + corev1 "k8s.io/api/core/v1" +) + +func TestParseSchedulerMessage_TotalNodesAndPreemptionTail(t *testing.T) { + msg := "0/5 nodes are available: 2 Insufficient cpu, 3 node(s) had untolerated taint {dedicated: gpu}. " + + "preemption: 0/5 nodes are available: 5 No preemption victims found for incoming pod." + total, reasons := parseSchedulerMessage(msg) + if total != 5 { + t.Fatalf("totalNodes = %d, want 5", total) + } + if len(reasons) != 2 { + t.Fatalf("got %d reasons, want 2 (preemption tail must be dropped): %+v", len(reasons), reasons) + } + if reasons[0].Class != SchedInsufficientResource || reasons[0].Resource != "cpu" || reasons[0].NodeCount != 2 { + t.Errorf("reason[0] = %+v, want InsufficientResource cpu count=2", reasons[0]) + } + if reasons[1].Class != SchedUntoleratedTaint || reasons[1].TaintKey != "dedicated" || reasons[1].TaintValue != "gpu" || reasons[1].NodeCount != 3 { + t.Errorf("reason[1] = %+v, want UntoleratedTaint dedicated=gpu count=3", reasons[1]) + } +} + +func TestParseSchedulerMessage_Classes(t *testing.T) { + cases := []struct { + name string + clause string // becomes "0/3 nodes are available: ." + class SchedReasonClass + resource string + taintK string + taintV string + }{ + {"insufficient cpu", "3 Insufficient cpu", SchedInsufficientResource, "cpu", "", ""}, + {"insufficient memory", "3 Insufficient memory", SchedInsufficientResource, "memory", "", ""}, + {"insufficient gpu", "3 Insufficient nvidia.com/gpu", SchedInsufficientResource, "nvidia.com/gpu", "", ""}, + {"too many pods", "3 Too many pods", SchedInsufficientResource, "pods", "", ""}, + {"node affinity/selector", "3 node(s) didn't match Pod's node affinity/selector", SchedNodeAffinitySelector, "", "", ""}, + {"node affinity only", "3 node(s) didn't match Pod's node affinity", SchedNodeAffinitySelector, "", "", ""}, + {"node selector older", "3 node(s) didn't match node selector", SchedNodeAffinitySelector, "", "", ""}, + {"pod affinity", "3 node(s) didn't match pod affinity rules", SchedPodAffinity, "", "", ""}, + {"pod anti-affinity", "3 node(s) didn't match pod anti-affinity rules", SchedPodAntiAffinity, "", "", ""}, + {"existing anti-affinity", "3 node(s) didn't satisfy existing pods anti-affinity rules", SchedPodAntiAffinity, "", "", ""}, + {"topology spread", "3 node(s) didn't match pod topology spread constraints", SchedTopologySpread, "", "", ""}, + {"volume node affinity", "3 node(s) had volume node affinity conflict", SchedVolumeNodeAffinity, "", "", ""}, + {"volume count", "3 node(s) exceed max volume count", SchedVolumeCount, "", "", ""}, + {"no free ports", "3 node(s) didn't have free ports for the requested pod ports", SchedNoPorts, "", "", ""}, + {"cordoned", "3 node(s) were unschedulable", SchedNodeUnschedulable, "", "", ""}, + {"taint with value", "3 node(s) had untolerated taint {dedicated: gpu}", SchedUntoleratedTaint, "", "dedicated", "gpu"}, + {"taint no value", "3 node(s) had untolerated taint {workload}", SchedUntoleratedTaint, "", "workload", ""}, + {"lifecycle taint reclassified", "3 node(s) had untolerated taint {node.kubernetes.io/not-ready: }", SchedNodeUnschedulable, "", "node.kubernetes.io/not-ready", ""}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + _, reasons := parseSchedulerMessage("0/3 nodes are available: " + c.clause + ".") + if len(reasons) != 1 { + t.Fatalf("got %d reasons, want 1: %+v", len(reasons), reasons) + } + r := reasons[0] + if r.Class != c.class { + t.Errorf("class = %q, want %q (raw=%q)", r.Class, c.class, r.Raw) + } + if c.resource != "" && r.Resource != c.resource { + t.Errorf("resource = %q, want %q", r.Resource, c.resource) + } + if c.taintK != "" && (r.TaintKey != c.taintK || r.TaintValue != c.taintV) { + t.Errorf("taint = %q=%q, want %q=%q", r.TaintKey, r.TaintValue, c.taintK, c.taintV) + } + if r.NodeCount != 3 { + t.Errorf("nodeCount = %d, want 3", r.NodeCount) + } + }) + } +} + +func TestParseSchedulerMessage_MultiClauseArchAndTaint(t *testing.T) { + // The arch-mismatch shape: scheduler reports a selector miss; the + // node-fit resolver later names kubernetes.io/arch specifically. + msg := "0/6 nodes are available: 4 node(s) didn't match Pod's node affinity/selector, " + + "2 node(s) had untolerated taint {dedicated: gpu}." + total, reasons := parseSchedulerMessage(msg) + if total != 6 || len(reasons) != 2 { + t.Fatalf("total=%d reasons=%d, want 6/2: %+v", total, len(reasons), reasons) + } + if reasons[0].Class != SchedNodeAffinitySelector || reasons[0].NodeCount != 4 { + t.Errorf("reason[0] = %+v, want NodeAffinitySelector count=4", reasons[0]) + } + if reasons[1].Class != SchedUntoleratedTaint || reasons[1].NodeCount != 2 { + t.Errorf("reason[1] = %+v, want UntoleratedTaint count=2", reasons[1]) + } +} + +func TestParseSchedulerMessage_WholeMessageVariants(t *testing.T) { + total, reasons := parseSchedulerMessage("pod has unbound immediate PersistentVolumeClaims") + if total != 0 { + t.Errorf("totalNodes = %d, want 0 (no node prefix)", total) + } + if len(reasons) != 1 || reasons[0].Class != SchedVolumeBinding { + t.Fatalf("want single VolumeBinding reason, got %+v", reasons) + } +} + +func TestParseSchedulerMessage_Empty(t *testing.T) { + if total, reasons := parseSchedulerMessage(""); total != 0 || reasons != nil { + t.Errorf("empty message should yield 0/nil, got %d/%+v", total, reasons) + } + if _, reasons := parseSchedulerMessage(" "); reasons != nil { + t.Errorf("whitespace message should yield nil reasons, got %+v", reasons) + } +} + +func TestResolveUnsatisfiableNodeSelector_ArchMismatch(t *testing.T) { + nodes := []NodeFacts{ + {Name: "n1", Labels: map[string]string{"kubernetes.io/arch": "amd64", "kubernetes.io/os": "linux"}}, + {Name: "n2", Labels: map[string]string{"kubernetes.io/arch": "amd64", "kubernetes.io/os": "linux"}}, + } + p := PodPlacement{NodeSelector: map[string]string{"kubernetes.io/arch": "arm64"}} + got := resolveUnsatisfiableNodeSelector(p, nodes) + if len(got) != 1 { + t.Fatalf("got %d explanations, want 1: %+v", len(got), got) + } + // Must name the offending key, the required value, and the fleet's actual value. + for _, want := range []string{"kubernetes.io/arch", "arm64", "amd64"} { + if !strings.Contains(got[0], want) { + t.Errorf("explanation %q missing %q", got[0], want) + } + } +} + +func TestResolveUnsatisfiableNodeSelector_ZoneViaAffinity(t *testing.T) { + nodes := []NodeFacts{ + {Name: "n1", Labels: map[string]string{"topology.kubernetes.io/zone": "us-east-1a"}}, + {Name: "n2", Labels: map[string]string{"topology.kubernetes.io/zone": "us-east-1a"}}, + } + p := PodPlacement{RequiredNodeAffinity: []NodeSelectorTermFacts{ + {Expressions: []MatchExpr{{Key: "topology.kubernetes.io/zone", Operator: "In", Values: []string{"us-east-1b"}}}}, + }} + got := resolveUnsatisfiableNodeSelector(p, nodes) + if len(got) != 1 || !strings.Contains(got[0], "topology.kubernetes.io/zone") || !strings.Contains(got[0], "us-east-1a") { + t.Fatalf("want zone explanation naming the key + fleet value, got %+v", got) + } +} + +func TestResolveUnsatisfiableNodeSelector_MissingLabelEntirely(t *testing.T) { + nodes := []NodeFacts{{Name: "n1", Labels: map[string]string{"kubernetes.io/arch": "amd64"}}} + p := PodPlacement{NodeSelector: map[string]string{"disktype": "ssd"}} + got := resolveUnsatisfiableNodeSelector(p, nodes) + if len(got) != 1 || !strings.Contains(got[0], "no node carries label disktype") { + t.Fatalf("want 'no node carries label disktype', got %+v", got) + } +} + +func TestResolveUnsatisfiableNodeSelector_Satisfiable(t *testing.T) { + nodes := []NodeFacts{ + {Name: "n1", Labels: map[string]string{"kubernetes.io/arch": "amd64"}}, + {Name: "n2", Labels: map[string]string{"kubernetes.io/arch": "arm64"}}, + } + // arm64 IS present on n2 → no explanation. + p := PodPlacement{NodeSelector: map[string]string{"kubernetes.io/arch": "arm64"}} + if got := resolveUnsatisfiableNodeSelector(p, nodes); len(got) != 0 { + t.Fatalf("satisfiable selector should yield no explanations, got %+v", got) + } +} + +func TestResolveUnsatisfiableNodeSelector_AnyTermSatisfiable(t *testing.T) { + nodes := []NodeFacts{{Name: "n1", Labels: map[string]string{"pool": "general"}}} + // Two terms ORed: one matches "general" → affinity is satisfiable, no report. + p := PodPlacement{RequiredNodeAffinity: []NodeSelectorTermFacts{ + {Expressions: []MatchExpr{{Key: "pool", Operator: "In", Values: []string{"gpu"}}}}, + {Expressions: []MatchExpr{{Key: "pool", Operator: "In", Values: []string{"general"}}}}, + }} + if got := resolveUnsatisfiableNodeSelector(p, nodes); len(got) != 0 { + t.Fatalf("one satisfiable term should yield no explanations, got %+v", got) + } +} + +func TestNodeMatchesExpr_Operators(t *testing.T) { + n := NodeFacts{Labels: map[string]string{"arch": "arm64", "rank": "5"}} + cases := []struct { + e MatchExpr + want bool + }{ + {MatchExpr{"arch", "In", []string{"arm64", "amd64"}}, true}, + {MatchExpr{"arch", "In", []string{"amd64"}}, false}, + {MatchExpr{"arch", "NotIn", []string{"amd64"}}, true}, + {MatchExpr{"arch", "Exists", nil}, true}, + {MatchExpr{"gpu", "Exists", nil}, false}, + {MatchExpr{"gpu", "DoesNotExist", nil}, true}, + {MatchExpr{"rank", "Gt", []string{"3"}}, true}, + {MatchExpr{"rank", "Lt", []string{"3"}}, false}, + } + for _, c := range cases { + if got := nodeMatchesExpr(n, c.e); got != c.want { + t.Errorf("nodeMatchesExpr(%+v) = %v, want %v", c.e, got, c.want) + } + } +} + +func TestDescribeUnschedulable_ArchMismatchNamesLabel(t *testing.T) { + pod := &corev1.Pod{Spec: corev1.PodSpec{ + NodeSelector: map[string]string{"kubernetes.io/arch": "arm64"}, + }} + nodes := []NodeFacts{ + {Name: "n1", Labels: map[string]string{"kubernetes.io/arch": "amd64"}}, + {Name: "n2", Labels: map[string]string{"kubernetes.io/arch": "amd64"}}, + } + msg := describeUnschedulable(pod, "0/2 nodes are available: 2 node(s) didn't match Pod's node affinity/selector.", nodes) + for _, want := range []string{"kubernetes.io/arch", "arm64", "amd64", "0/2 nodes available"} { + if !strings.Contains(msg, want) { + t.Errorf("message %q missing %q", msg, want) + } + } + // The resolved label supersedes the generic clause — don't double-report. + if strings.Contains(msg, "node affinity/selector mismatch") { + t.Errorf("generic affinity clause should be omitted once resolved: %q", msg) + } +} + +func TestDescribeUnschedulable_ResourcesAndTaint(t *testing.T) { + pod := &corev1.Pod{} + msg := describeUnschedulable(pod, + "0/5 nodes are available: 3 Insufficient cpu, 2 node(s) had untolerated taint {dedicated: gpu}.", nil) + for _, want := range []string{"insufficient cpu", "untolerated taint dedicated=gpu", "0/5 nodes available"} { + if !strings.Contains(msg, want) { + t.Errorf("message %q missing %q", msg, want) + } + } +} + +func TestDescribeUnschedulable_FallbackWhenUnparseable(t *testing.T) { + if got := describeUnschedulable(&corev1.Pod{}, "", nil); got != "Pod is unschedulable" { + t.Errorf("empty verdict should fall back, got %q", got) + } + raw := "some future scheduler phrasing we don't model yet" + if got := describeUnschedulable(&corev1.Pod{}, raw, nil); got != raw { + t.Errorf("unmodeled verdict should pass through raw, got %q", got) + } +} + +func TestExtractPodPlacement(t *testing.T) { + pod := &corev1.Pod{Spec: corev1.PodSpec{ + NodeSelector: map[string]string{"disktype": "ssd"}, + Affinity: &corev1.Affinity{NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{{ + MatchExpressions: []corev1.NodeSelectorRequirement{{ + Key: "topology.kubernetes.io/zone", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"us-east-1a"}, + }}, + }}, + }, + }}, + }} + p := extractPodPlacement(pod) + if p.NodeSelector["disktype"] != "ssd" { + t.Errorf("nodeSelector not carried: %+v", p.NodeSelector) + } + if len(p.RequiredNodeAffinity) != 1 || len(p.RequiredNodeAffinity[0].Expressions) != 1 { + t.Fatalf("required affinity not extracted: %+v", p.RequiredNodeAffinity) + } + e := p.RequiredNodeAffinity[0].Expressions[0] + if e.Key != "topology.kubernetes.io/zone" || e.Operator != "In" || len(e.Values) != 1 { + t.Errorf("expr mismatch: %+v", e) + } +} + +func TestClassifyAdmissionFailure(t *testing.T) { + cases := []struct { + msg string + reason string + ok bool + }{ + {`Error creating: pods "x" is forbidden: exceeded quota: mem-quota, requested: limits.memory=1Gi, used: limits.memory=2Gi, limited: limits.memory=2Gi`, "QuotaExceeded", true}, + {`Error creating: pods "fix-auth" is forbidden: failed quota: memory-limit-quota: must specify limits.memory`, "QuotaExceeded", true}, + {`Error creating: pods "x" is forbidden: violates PodSecurity "restricted:latest"`, "PodSecurityViolation", true}, + {`Error creating: admission webhook "vpod.example.com" denied the request: nope`, "WebhookDenied", true}, + {`Error creating: pods "x" is forbidden: maximum cpu usage per Container is 1, but limit is 2`, "LimitRangeViolation", true}, + {`Error creating: object is being deleted: pods "x" already exists`, "", false}, + {`some unrelated message`, "", false}, + } + for _, c := range cases { + reason, ok := classifyAdmissionFailure(c.msg) + if ok != c.ok || reason != c.reason { + t.Errorf("classifyAdmissionFailure(%.50q) = %q,%v want %q,%v", c.msg, reason, ok, c.reason, c.ok) + } + } +} + +func TestClassifyPostBindFailure(t *testing.T) { + cases := []struct { + reason string + msg string + want string + ok bool + }{ + {"FailedCreatePodSandBox", `failed to create pod sandbox: ... failed to assign an IP address to container; InsufficientFreeAddresses`, "IPExhaustion", true}, + {"FailedCreatePodSandBox", `failed to create pod sandbox: rpc error: code = Unknown`, "SandboxCreationFailed", true}, + {"FailedAttachVolume", `Multi-Attach error for volume "pvc-123" Volume is already used by pod(s) other-pod`, "VolumeMultiAttach", true}, + {"FailedAttachVolume", `AttachVolume.Attach failed for volume "pvc-123": timed out`, "VolumeAttach", true}, + {"FailedMount", `Unable to attach or mount volumes: timed out waiting for the condition`, "VolumeMount", true}, + {"BackOff", `Back-off restarting failed container`, "", false}, + {"Scheduled", `Successfully assigned default/x to node-1`, "", false}, + } + for _, c := range cases { + got, ok := classifyPostBindFailure(c.reason, c.msg) + if ok != c.ok || got != c.want { + t.Errorf("classifyPostBindFailure(%q, %.40q) = %q,%v want %q,%v", c.reason, c.msg, got, ok, c.want, c.ok) + } + } +} + +func TestParseTaintPayload(t *testing.T) { + cases := map[string]struct{ k, v string }{ + "3 node(s) had untolerated taint {dedicated: gpu}": {"dedicated", "gpu"}, + "3 node(s) had untolerated taint {workload}": {"workload", ""}, + "3 node(s) had untolerated taint {node.kubernetes.io/unreachable: }": {"node.kubernetes.io/unreachable", ""}, + "3 node(s) had untolerated taint": {"", ""}, + } + for clause, want := range cases { + k, v := parseTaintPayload(clause) + if k != want.k || v != want.v { + t.Errorf("parseTaintPayload(%q) = %q,%q want %q,%q", clause, k, v, want.k, want.v) + } + } +} diff --git a/internal/k8s/testing.go b/internal/k8s/testing.go index 60e301723..c22293897 100644 --- a/internal/k8s/testing.go +++ b/internal/k8s/testing.go @@ -30,6 +30,7 @@ func InitTestResourceCache(client kubernetes.Interface) error { "secrets": true, "events": true, "persistentvolumeclaims": true, + "resourcequotas": true, "nodes": true, "namespaces": true, "jobs": true, diff --git a/internal/mcp/tools.go b/internal/mcp/tools.go index 8a9753856..13c2537eb 100644 --- a/internal/mcp/tools.go +++ b/internal/mcp/tools.go @@ -160,7 +160,10 @@ func registerTools(server *mcp.Server) { "StatefulSet/DaemonSet: the resource (Kubernetes-shaped detail) + diagnostic " + "resourceContext (managedBy, exposes, selectedBy, uses, runsOn, " + "issue/audit/policy rollups) + current AND previous container logs across the " + - "workload's pods + recent Warning events filtered to this resource. Use for " + + "workload's pods + recent Warning events filtered to this resource + a " + + "startupBlockers section when the workload can't reach Running (unschedulable " + + "with the offending node constraint named, admission/quota rejection, or a " + + "post-bind CNI/volume stall). Use for " + "CrashLoopBackOff, OOMKills, failed deploys, image-pull errors, readiness " + "flaps, scheduling failures, error-spewing services, or any workload " + "root-causing where you would otherwise call get_resource → events → " + @@ -270,21 +273,21 @@ func registerTools(server *mcp.Server) { Description: "Use when the agent's decision is 'what's broken right now?' — LIVE " + "OPERATIONAL STATE, not config posture. Returns a ranked list of currently " + "failing resources: failing Deployments/StatefulSets/CronJobs/HPAs/Nodes/Jobs/" + - "PVCs (problem source), dangling-reference errors like Pod→missing PVC/CM/" + - "Secret/SA, HPA→missing scaleTargetRef, Ingress→missing backend Service, " + - "RoleBinding→missing Role, webhook→missing Service (missing_ref source), " + - "False .status.conditions on CRDs from Argo/Flux/Knative/Crossplane/" + - "cert-manager/KEDA (condition source), recent K8s Warning events (event " + - "source, opt-in), Kyverno PolicyReport policy violations (kyverno source, " + - "opt-in). Severity normalized to critical/warning. Defaults: problem + " + - "missing_ref + condition. event and kyverno are opt-in because they run " + - "50–1000+ rows per cluster. For STATIC best-practice / security-posture / " + - "compliance findings (runAsRoot, missing PDB, no probes, missing resource " + - "limits, etc.), use get_cluster_audit — that's a separate axis and the two " + - "should never be conflated (a healthy pod can have many audit findings; a " + - "crashing pod can have zero). The `source` param is a FILTER: source=kyverno " + - "returns ONLY Kyverno rows. To ADD an opt-in source to the defaults, list " + - "everything explicitly — e.g. source=problem,missing_ref,condition,kyverno. " + + "PVCs, dangling-reference errors like Pod→missing PVC/CM/Secret/SA, HPA→missing " + + "scaleTargetRef, Ingress→missing backend Service, RoleBinding→missing Role, " + + "webhook→missing Service, pod startup blockers — why a Pod can't reach Running: " + + "unschedulable (arch/taint/resources/affinity), admission-rejected " + + "(quota/PodSecurity/webhook), or stuck post-bind (CNI/volume), and False " + + ".status.conditions on CRDs from Argo/Flux/Knative/Crossplane/cert-manager/KEDA. " + + "Severity normalized to critical/warning. This is one curated stream — there is " + + "no source filter; each row carries a `source` label (problem|missing_ref|" + + "scheduling|condition) you can slice on via the CEL filter= if needed. " + + "For raw Kubernetes Warning events use get_events; for static best-practice / " + + "security-posture findings (runAsRoot, missing PDB, no probes, missing resource " + + "limits) use get_cluster_audit — a separate axis that must never be conflated (a " + + "healthy pod can have many audit findings; a crashing pod can have zero). Kyverno " + + "PolicyReport violations are not in either — they surface per-resource via " + + "get_resource's resourceContext policy rollup. " + "After identifying a suspect issue, call diagnose when the affected resource " + "is a workload (Pod/Deployment/StatefulSet/DaemonSet) — it bundles spec + " + "logs + events + context in one call. For non-workload kinds, call " + @@ -427,7 +430,7 @@ type getResourceInput struct { Group string `json:"group,omitempty" jsonschema:"API group when the kind is ambiguous (e.g. cluster.x-k8s.io for CAPI Cluster vs CNPG Cluster)"` Namespace string `json:"namespace,omitempty" jsonschema:"namespace for namespaced kinds. Leave empty for cluster-scoped kinds (Node, ClusterRole, ClusterRoleBinding, IngressClass, PriorityClass, StorageClass, etc.)."` Name string `json:"name" jsonschema:"resource name"` - Include string `json:"include,omitempty" jsonschema:"optional sidecar data after narrowing to this object: events, metrics, logs. Separate from context; include may fetch heavier live/derived data."` + Include string `json:"include,omitempty" jsonschema:"optional sidecar data after narrowing to this object: events, metrics. Separate from context. For logs use get_pod_logs / get_workload_logs (container, previous, since, grep) or diagnose for the full workload bundle."` Context string `json:"context,omitempty" jsonschema:"resourceContext tier: 'basic' (default; attaches managedBy / exposes / selectedBy / uses / runsOn / issueSummary / auditSummary rollups) or 'none' (bare minified resource). For full diagnostic tier with logs + events bundled, use the diagnose tool instead."` } @@ -473,11 +476,9 @@ type searchInput struct { type issuesInput struct { Namespace string `json:"namespace,omitempty" jsonschema:"filter to one namespace"` Severity string `json:"severity,omitempty" jsonschema:"comma-separated: critical,warning"` - Source string `json:"source,omitempty" jsonschema:"comma-separated list of LIVE operational sources to RETURN: problem,missing_ref,event,condition,kyverno. Acts as a FILTER, not an additive opt-in — when set, only the listed sources appear in the response. Default (omitted): problem+missing_ref+condition (event + kyverno excluded because each is loud: events flood thousands per cluster and mostly duplicate problem-source rows; Kyverno PolicyReports typically 10+ rows per workload under a baseline PSS profile). missing_ref surfaces dangling-reference errors (Pod→missing PVC/CM/Secret/SA, HPA→missing target, Ingress→missing backend, RoleBinding→missing roleRef, webhook→missing Service). Examples: source='kyverno' returns ONLY Kyverno rows (no problems, no missing_refs, no conditions); source='problem,missing_ref,condition,kyverno' returns the defaults plus Kyverno. Static best-practice/security-posture audit findings are intentionally not a source here; use get_cluster_audit."` Kind string `json:"kind,omitempty" jsonschema:"comma-separated kind filter (e.g. Deployment,Pod)"` - Since string `json:"since,omitempty" jsonschema:"event lookback window, e.g. 15m or 1h. Only affects the event source; when events are enabled and since is omitted, defaults to 1h to avoid pulling the full event-cache backlog."` Limit int `json:"limit,omitempty" jsonschema:"max issues returned (default 200, max 1000)"` - Filter string `json:"filter,omitempty" jsonschema:"optional CEL boolean expression run against each composed Issue. Bindings: severity, source, kind, group, ns (the namespace — note: use 'ns' not 'namespace' because the latter is a CEL reserved word), name, reason, message, count (int), cluster, last_seen (unix seconds). Examples: 'severity == \"critical\" && count > 5', 'source == \"condition\" && ns.startsWith(\"prod-\")'"` + Filter string `json:"filter,omitempty" jsonschema:"optional CEL boolean expression run against each composed Issue. Bindings: severity, source (the detector that found it: problem|missing_ref|scheduling|condition), kind, group, ns (the namespace — note: use 'ns' not 'namespace' because the latter is a CEL reserved word), name, reason, message, count (int), cluster, last_seen (unix seconds). Examples: 'severity == \"critical\" && count > 5', 'source == \"condition\" && ns.startsWith(\"prod-\")'"` } // Tool handlers @@ -979,31 +980,31 @@ func attachResourceExtras(ctx context.Context, cache *k8s.ResourceCache, result } } + // include=logs was dropped from get_resource (it was Pod-only and lacked + // container/previous/since/grep). Signal it explicitly rather than silently + // no-op'ing, so a client on a stale schema is redirected instead of seeing + // an empty success. if includes["logs"] { - if isPodKind(kind) { - client := k8s.ClientFromContext(ctx) - if client == nil { - result["logsError"] = "no kube client in request context" - } else { - tailLines := int64(100) - opts := &corev1.PodLogOptions{TailLines: &tailLines} - stream, err := client.CoreV1().Pods(namespace).GetLogs(name, opts).Stream(ctx) - if err != nil { - log.Printf("[mcp] Failed to get logs for %s/%s: %v", namespace, name, err) - result["logsError"] = fmt.Sprintf("failed to open log stream: %v", err) - } else { - defer stream.Close() - data, readErr := io.ReadAll(stream) - if readErr != nil { - log.Printf("[mcp] Failed to read logs for %s/%s: %v", namespace, name, readErr) - result["logsError"] = fmt.Sprintf("failed to read log stream: %v", readErr) - } else { - result["logs"] = aicontext.FilterLogs(string(data)) - } - } - } + result["logsError"] = "include=logs is no longer supported here; use get_pod_logs or get_workload_logs (container, previous, since, grep) or diagnose for the full workload bundle" + } + + // Any other token (typo, or a value like "relationships" that moved to + // resourceContext) is silently dropped by the branches above. Surface it + // so the caller learns the token did nothing rather than seeing an empty + // success — the same reason logs gets an explicit error. + var unknown []string + for tok := range includes { + switch tok { + case "events", "metrics", "logs": + default: + unknown = append(unknown, tok) } } + if len(unknown) > 0 { + sort.Strings(unknown) + result["includeError"] = fmt.Sprintf("unknown include value(s): %s (valid: events, metrics)", strings.Join(unknown, ", ")) + } + } // normalizeDisplayKind converts a lowercase kind to its display form for matching @@ -1869,6 +1870,29 @@ func buildDashboard(ctx context.Context, cache *k8s.ResourceCache, namespace str }) } + // Scheduling problems: unschedulable pods (with the offending node + // constraint named), admission rejections (quota/PodSecurity/webhook — + // no Pod exists, so the pod loop above can't see them), and post-bind + // CNI/volume stalls. The pod loop only emits "error" pods, so these are + // additive; the seenProblem dedup below keys on reason, letting a pod's + // scheduling row coexist with a distinct missing-ref row. + sched := k8s.DetectSchedulingProblems(cache, namespace) + sched = append(sched, k8s.DetectAdmissionProblems(cache, namespace)...) + sched = append(sched, k8s.DetectPostBindProblems(cache, namespace)...) + for _, p := range sched { + allProblems = append(allProblems, mcpProblem{ + Kind: p.Kind, + Namespace: p.Namespace, + Name: p.Name, + Group: p.Group, + Severity: p.Severity, + Reason: p.Reason, + Message: p.Message, + Age: p.Age, + ageSeconds: p.AgeSeconds, + }) + } + // Missing-ref problems (Pod→missing CM/Secret/PVC/SA, HPA→missing // target, Ingress→missing backend, PVC→missing SC, RoleBinding→missing // roleRef). Pod rows are intentionally kept here because the pod-error @@ -2271,13 +2295,8 @@ func handleIssuesTool(ctx context.Context, _ *mcp.CallToolRequest, input issuesI if err != nil { return nil, nil, err } - sources, err := issues.ParseSources(input.Source) - if err != nil { - return nil, nil, err - } filters := issues.Filters{ Severities: severities, - Sources: sources, Kinds: splitCSVStr(input.Kind), Limit: input.Limit, Namespaces: allowedNamespaces, @@ -2292,38 +2311,6 @@ func handleIssuesTool(ctx context.Context, _ *mcp.CallToolRequest, input issuesI } filters.Filter = f } - if input.Since != "" { - d, err := time.ParseDuration(input.Since) - if err != nil { - return nil, nil, fmt.Errorf("invalid since=%q: %w", input.Since, err) - } - if d < 0 { - return nil, nil, fmt.Errorf("since must be non-negative, got %s", d) - } - filters.Since = d - } - // Audit / event / kyverno collection is gated by IncludeX flags - // (default off). The MCP input doesn't surface separate include_* - // knobs, so listing one of those sources in `source` is the only - // way to enable the matching IncludeX. This means source= acts as - // BOTH a filter AND the collection trigger for noisy sources: - // source=kyverno enables Kyverno collection AND narrows results - // to just kyverno rows. To get "defaults plus Kyverno" over MCP, - // pass source=problem,condition,kyverno. Mirror the HTTP handler's - // 1h since-default when events are enabled with no explicit - // window, so an MCP caller doesn't silently inherit the full - // event-cache backlog. - for _, s := range filters.Sources { - switch s { - case issues.SourceEvent: - filters.IncludeEvents = true - case issues.SourceKyverno: - filters.IncludeKyverno = true - } - } - if filters.IncludeEvents && filters.Since == 0 { - filters.Since = time.Hour - } out, stats := issues.ComposeWithStats(provider, filters) resp := map[string]any{ "issues": out, @@ -2337,7 +2324,7 @@ func handleIssuesTool(ctx context.Context, _ *mcp.CallToolRequest, input issuesI // Steering hint when the issue list was capped. if stats.TotalMatched > len(out) { resp["narrowHint"] = fmt.Sprintf( - "returned %d of %d issues — narrow with namespace=, kind=, severity=critical, source= (e.g. problem,condition), since= (e.g. 15m), add filter= CEL, or raise limit (cap 1000)", + "returned %d of %d issues — narrow with namespace=, kind=, severity=critical, add filter= CEL, or raise limit (cap 1000)", len(out), stats.TotalMatched, ) } @@ -2350,20 +2337,6 @@ func handleIssuesTool(ctx context.Context, _ *mcp.CallToolRequest, input issuesI resp["filter_errors"] = stats.FilterErrors resp["filter_error_sample"] = stats.FilterErrorSample } - // Surface the Kyverno index lifecycle when the caller asked for it. - // Without this an empty kyverno list collapses four states - // (not_installed / deferred / warmup / ready-but-empty) into one, - // and the agent can't tell whether to fall back to a direct fetch - // or report "cluster has no violations" to the operator. Mirrors - // the HTTP /api/issues response shape. - if filters.IncludeKyverno { - meta, _ := resp["meta"].(map[string]any) - if meta == nil { - meta = map[string]any{} - } - meta["kyverno"] = provider.KyvernoStatus() - resp["meta"] = meta - } return toJSONResult(resp) } diff --git a/internal/mcp/tools_diagnose.go b/internal/mcp/tools_diagnose.go index 9aca703b7..462ad82af 100644 --- a/internal/mcp/tools_diagnose.go +++ b/internal/mcp/tools_diagnose.go @@ -46,8 +46,27 @@ type diagnoseResponse struct { LogsError string `json:"logsError,omitempty"` Events []aicontext.DeduplicatedEvent `json:"events,omitempty"` EventsError string `json:"eventsError,omitempty"` - Pods int `json:"pods"` - NarrowHint string `json:"narrowHint,omitempty"` + // StartupBlockers carries why the workload can't reach Running when that's + // the failure mode, spanning the whole pre-Running path: unschedulable pods + // (offending node constraint named), admission rejections (quota/ + // PodSecurity/webhook — where no Pod is created), or post-bind CNI/volume + // stalls. Empty when the workload starts fine. Named for the symptom + // ("can't start"), not the subsystem — "scheduling" alone would mislead, + // since it also covers admission and post-bind. + StartupBlockers []startupBlocker `json:"startupBlockers,omitempty"` + Pods int `json:"pods"` + NarrowHint string `json:"narrowHint,omitempty"` +} + +// startupBlocker is the compact row diagnose embeds for one reason a workload +// can't reach Running — the same signal the issues tool emits, scoped here to +// this workload (bind-time, admission, or post-bind). +type startupBlocker struct { + Kind string `json:"kind"` + Name string `json:"name"` + Reason string `json:"reason"` + Severity string `json:"severity"` + Message string `json:"message"` } // maxDiagnosePods caps the log fan-out so large DaemonSets / Deployments @@ -194,9 +213,67 @@ func handleDiagnose(ctx context.Context, _ *mcp.CallToolRequest, input diagnoseI if eventsErr != nil { resp.EventsError = eventsErr.Error() } + + resp.StartupBlockers = startupBlockersForWorkload(cache, kindNorm, input.Namespace, input.Name, pods) return toJSONResult(resp) } +// startupBlockersForWorkload runs the pre-Running detectors over the namespace +// and keeps the rows relevant to THIS workload: its own pods (bind-time / +// post-bind) and admission FailedCreate on the workload or its ReplicaSet. +// Namespace-scoped findings that aren't tied to this workload (the prior +// blanket "any ResourceQuota" case) are deliberately excluded — attaching a +// namespace's quota state to an unrelated workload over-attributes failures. +func startupBlockersForWorkload(cache *k8s.ResourceCache, kind, namespace, name string, pods []*corev1.Pod) []startupBlocker { + all := k8s.DetectSchedulingProblems(cache, namespace) + all = append(all, k8s.DetectAdmissionProblems(cache, namespace)...) + all = append(all, k8s.DetectPostBindProblems(cache, namespace)...) + if len(all) == 0 { + return nil + } + + podNames := make(map[string]bool, len(pods)) + for _, p := range pods { + podNames[p.Name] = true + } + dispKind := normalizeDisplayKind(kind) + + var out []startupBlocker + for _, p := range all { + relevant := false + switch { + case p.Kind == "Pod" && podNames[p.Name]: + relevant = true + case p.Kind == dispKind && p.Name == name: + relevant = true // FailedCreate on the workload itself (StatefulSet/DaemonSet) + case dispKind == "Deployment" && p.Kind == "ReplicaSet" && isReplicaSetOf(p.Name, name): + relevant = true // FailedCreate on the Deployment's ReplicaSet + } + if !relevant { + continue + } + out = append(out, startupBlocker{ + Kind: p.Kind, + Name: p.Name, + Reason: p.Reason, + Severity: p.Severity, + Message: p.Message, + }) + } + return out +} + +// isReplicaSetOf reports whether rsName belongs to the given Deployment. +// Deployment ReplicaSets are named "-" with a +// single hyphen-free hash segment, so we require exactly one trailing segment +// after "-". This avoids a prefix false-match against a sibling +// Deployment that merely shares the prefix (diagnosing "api" must not claim +// "api-gateway-", which belongs to Deployment "api-gateway"). +func isReplicaSetOf(rsName, deployName string) bool { + suffix, ok := strings.CutPrefix(rsName, deployName+"-") + return ok && suffix != "" && !strings.Contains(suffix, "-") +} + // normalizeDiagnoseKind accepts pod/deployment/statefulset/daemonset in any // singular/plural form and returns the plural cache form. Empty return means // unsupported. Delegates to normalizeWorkloadKind for the workload kinds so diff --git a/internal/mcp/tools_diagnose_test.go b/internal/mcp/tools_diagnose_test.go index 53ea1ead6..b849a7cd1 100644 --- a/internal/mcp/tools_diagnose_test.go +++ b/internal/mcp/tools_diagnose_test.go @@ -217,3 +217,80 @@ func TestHandleDiagnose_DeploymentNotFound(t *testing.T) { t.Errorf("expected 'not found' error, got %v", err) } } + +// TestStartupBlockersForWorkload_ScopesToWorkload pins the relevance filter: +// a namespace-wide detector sweep must attach only rows belonging to the +// diagnosed workload. This commit changed the contract (dropped the blanket +// "any ResourceQuota" arm), so the scoping is the load-bearing logic that +// prevents over-attributing unrelated failures to a healthy workload. +func TestStartupBlockersForWorkload_ScopesToWorkload(t *testing.T) { + defer k8s.ResetTestState() + // Diagnosed Deployment "cart": its ReplicaSet is admission-blocked + // (created 0 of 2 pods, FailedCreate quota event) → must attach. + rs := &appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{Name: "cart-abc123", Namespace: "alpha"}, + Spec: appsv1.ReplicaSetSpec{Replicas: ptrInt32(2)}, + Status: appsv1.ReplicaSetStatus{Replicas: 0}, + } + rsEvt := &corev1.Event{ + ObjectMeta: metav1.ObjectMeta{Name: "e1", Namespace: "alpha"}, + InvolvedObject: corev1.ObjectReference{Kind: "ReplicaSet", Namespace: "alpha", Name: "cart-abc123"}, + Reason: "FailedCreate", + Type: corev1.EventTypeWarning, + Message: `Error creating: pods "x" is forbidden: exceeded quota: mem-quota, used: requests.memory=2Gi, limited: requests.memory=2Gi`, + LastTimestamp: metav1.Now(), + } + // An UNRELATED unschedulable pod in the same namespace → must NOT attach. + otherPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "other-pod", Namespace: "alpha"}, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{{ + Type: corev1.PodScheduled, Status: corev1.ConditionFalse, + Reason: "Unschedulable", Message: "0/1 nodes are available", + }}, + }, + } + if err := k8s.InitTestResourceCache(fake.NewClientset(rs, rsEvt, otherPod)); err != nil { + t.Fatalf("InitTestResourceCache: %v", err) + } + t.Cleanup(func() { k8s.ResetTestState() }) + + // pods arg = cart's own pods (none created). The RS attaches via the + // ReplicaSet-of-Deployment match, not via pod-name. + out := startupBlockersForWorkload(k8s.GetResourceCache(), "deployments", "alpha", "cart", nil) + + var sawRS bool + for _, b := range out { + if b.Name == "other-pod" { + t.Errorf("unrelated unschedulable pod must not attach to cart's startupBlockers: %+v", b) + } + if b.Kind == "ReplicaSet" && b.Name == "cart-abc123" { + sawRS = true + } + } + if !sawRS { + t.Errorf("the diagnosed Deployment's blocked ReplicaSet should attach, got %+v", out) + } +} + +func ptrInt32(i int32) *int32 { return &i } + +func TestIsReplicaSetOf(t *testing.T) { + cases := []struct { + rs, deploy string + want bool + }{ + {"api-5d4f8b6c7", "api", true}, // real RS of "api" + {"my-app-5d4f8b6c7", "my-app", true}, // hyphenated Deployment name + {"api-gateway-5d4f8b6c7", "api", false}, // belongs to "api-gateway", not "api" + {"api", "api", false}, // no hash suffix + {"api-", "api", false}, // empty hash + {"other-abc", "api", false}, // unrelated + } + for _, c := range cases { + if got := isReplicaSetOf(c.rs, c.deploy); got != c.want { + t.Errorf("isReplicaSetOf(%q, %q) = %v, want %v", c.rs, c.deploy, got, c.want) + } + } +} diff --git a/internal/mcp/tools_filter_test.go b/internal/mcp/tools_filter_test.go index 996cb3840..d063b933e 100644 --- a/internal/mcp/tools_filter_test.go +++ b/internal/mcp/tools_filter_test.go @@ -11,7 +11,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" - "github.com/skyhook-io/radar/internal/issues" "github.com/skyhook-io/radar/internal/k8s" pkgauth "github.com/skyhook-io/radar/pkg/auth" ) @@ -63,16 +62,6 @@ func setupFakeCacheForFilterTests(t *testing.T) { k8s.SetConnectionStatus(k8s.ConnectionStatus{State: k8s.StateConnected, Context: "fake-test"}) } -func TestParseSourceListRejectsAudit(t *testing.T) { - _, err := issues.ParseSources("audit") - if err == nil { - t.Fatal("issues.ParseSources(\"audit\") succeeded; want error") - } - if !strings.Contains(err.Error(), "get_cluster_audit") { - t.Fatalf("error did not point caller to get_cluster_audit: %v", err) - } -} - // withRestrictedUser primes the perm cache for a namespace-restricted user // (allowed = exactly the namespaces passed) and returns a context with that // user attached. Use nil/empty allowed for "denied to all" testing. diff --git a/internal/server/dashboard.go b/internal/server/dashboard.go index 68ff45b5d..5f0bc3dff 100644 --- a/internal/server/dashboard.go +++ b/internal/server/dashboard.go @@ -554,6 +554,17 @@ func (s *Server) getDashboardHealth(cache *k8s.ResourceCache, namespace string) pods, err = podLister.List(labels.Everything()) } } + // Pods the post-bind layer owns (stuck ContainerCreating on CNI/volume). + // Computed up front so the warning rollup below can skip them the same way + // it skips unschedulable pods — otherwise a long-Pending stuck pod gets + // both a bare "Pending" rollup row and the richer post-bind row. Keyed + // "namespace/name"; the slice is reused in the scheduling block below. + postBind := k8s.DetectPostBindProblems(cache, namespace) + postBindPods := make(map[string]bool, len(postBind)) + for _, p := range postBind { + postBindPods[p.Namespace+"/"+p.Name] = true + } + // Group unhealthy pods by owner workload for rollup ownerGroups := make(map[ownerKey]*ownerGroup) var orphanProblems []DashboardProblem @@ -566,7 +577,13 @@ func (s *Server) getDashboardHealth(cache *k8s.ResourceCache, namespace string) health.Healthy++ case "warning": health.Warning++ - collectPodForRollup(pod, "medium", now, ownerGroups, &orphanProblems) + // Unschedulable pods (bind-time) and stuck-creating pods + // (post-bind) are owned by the scheduling rows appended below, + // which name the actual constraint; don't also roll them up + // here as a bare "Pending". + if !k8s.IsPodUnschedulable(pod) && !postBindPods[pod.Namespace+"/"+pod.Name] { + collectPodForRollup(pod, "medium", now, ownerGroups, &orphanProblems) + } case "error": health.Error++ collectPodForRollup(pod, "critical", now, ownerGroups, &orphanProblems) @@ -656,6 +673,31 @@ func (s *Server) getDashboardHealth(cache *k8s.ResourceCache, namespace string) }) } + // Scheduling problems: unschedulable pods (with the offending node + // constraint named), admission rejections (quota/PodSecurity/webhook — no + // Pod exists, so the pod rollup above can't see them), and post-bind + // CNI/volume stalls. Appended directly (not through the Missing-ref Pod + // filter above) — an Unschedulable row IS the pod's scheduling reason; the + // pod rollup above skips unschedulable + post-bind pods so they don't + // double-surface. postBind was computed above for that skip; reuse it. + sched := k8s.DetectSchedulingProblems(cache, namespace) + sched = append(sched, k8s.DetectAdmissionProblems(cache, namespace)...) + sched = append(sched, postBind...) + for _, p := range sched { + problems = append(problems, DashboardProblem{ + Kind: p.Kind, + Namespace: p.Namespace, + Name: p.Name, + Severity: p.Severity, + Reason: p.Reason, + Message: p.Message, + Age: p.Age, + AgeSeconds: p.AgeSeconds, + Duration: p.Duration, + DurationSeconds: p.DurationSeconds, + }) + } + // CAPI problems (Cluster API resources) for _, p := range k8s.DetectCAPIProblems(k8s.GetDynamicResourceCache(), k8s.GetResourceDiscovery(), namespace) { problems = append(problems, DashboardProblem{ diff --git a/internal/server/issues_handler.go b/internal/server/issues_handler.go index 6ef26c27f..ca36444cc 100644 --- a/internal/server/issues_handler.go +++ b/internal/server/issues_handler.go @@ -4,7 +4,6 @@ import ( "fmt" "net/http" "strings" - "time" "github.com/skyhook-io/radar/internal/auth" "github.com/skyhook-io/radar/internal/filter" @@ -12,44 +11,22 @@ import ( "github.com/skyhook-io/radar/internal/k8s" ) -// handleIssues serves GET /api/issues — the unified cluster-health -// endpoint. Composes problems + condition fallback by default. Events -// and Kyverno are opt-in because both are loud — events flood with -// thousands of redundant rows on noisy clusters, and Kyverno -// PolicyReports add 10+ rows per workload under a baseline PSS profile. -// Static best-practice / posture findings are intentionally not an -// issues source; use /api/audit or MCP get_cluster_audit. +// handleIssues serves GET /api/issues — "what's broken right now." +// Composes the curated operational sources (workload/pod problems, +// dangling references, pod-startup blockers, and False CRD conditions), +// severity-ranked. Raw Warning events live at /api/events + the timeline; +// policy posture (Kyverno) and static best-practice findings live in +// /api/audit. Those are deliberately NOT issue sources — detection +// provenance is not a triage axis, so there is no source= filter (the +// `source` field is still on each returned row, and filter= CEL can slice +// on it for power users). // // Query params: // // namespace= / namespaces= one or comma-separated // severity= critical,warning (default: all) -// source= Comma-separated list of sources to RETURN. When set, -// only the listed sources appear in the response. -// Allowed: problem, missing_ref, event, condition, kyverno. -// Default (no source param): problem + missing_ref + -// condition (event + kyverno excluded because they can -// flood with noisy rows). missing_ref surfaces dangling- -// reference errors (Pod→missing PVC/CM/Secret/SA, HPA→ -// missing target, Ingress→missing backend, RoleBinding→ -// missing roleRef, webhook→missing Service). -// NOTE: source acts as a filter, not an additive opt-in. -// Passing source=kyverno returns ONLY Kyverno rows, not -// "defaults plus Kyverno". Use include_kyverno=true (or -// include_events) when you want -// "defaults plus X". -// include_events/include_kyverno=true -// Add the named source to the DEFAULT set without -// silencing the defaults. Effective filter: -// include_X=true is equivalent to source=problem, -// condition,X. These flags are also implicitly set when -// the matching source appears in source= so the warmup -// / collection path knows to fetch that source's data. // kind= Pod,Deployment,... (default: all) -// since= duration like 15m, 1h. Affects event source only; -// when events are enabled and since is omitted, the -// handler defaults to 1h to avoid pulling the full -// cached event backlog. +// filter= optional CEL predicate over each row (bindings include source) // limit= default 200, max 1000 func (s *Server) handleIssues(w http.ResponseWriter, r *http.Request) { if !s.requireConnected(w) { @@ -77,34 +54,11 @@ func (s *Server) handleIssues(w http.ResponseWriter, r *http.Request) { s.writeError(w, http.StatusBadRequest, err.Error()) return } - sources, err := issues.ParseSources(q.Get("source")) - if err != nil { - s.writeError(w, http.StatusBadRequest, err.Error()) - return - } - since, err := parseDuration(q.Get("since")) - if err != nil { - s.writeError(w, http.StatusBadRequest, err.Error()) - return - } - - includeEvents := q.Get("include_events") == "true" || hasSource(q.Get("source"), "event") - // When events are enabled and no explicit window was passed, cap - // the lookback at 1h. Without this an opt-in immediately yields - // the full cache window (hours of accumulated Warning events, - // most of which duplicate problem-source rows already returned). - if includeEvents && since == 0 { - since = time.Hour - } filters := issues.Filters{ - Namespaces: namespaces, - Severities: severities, - Sources: sources, - Kinds: splitCSV(q.Get("kind")), - Since: since, - Limit: parseLimit(q.Get("limit")), - IncludeEvents: includeEvents, - IncludeKyverno: q.Get("include_kyverno") == "true" || hasSource(q.Get("source"), "kyverno"), + Namespaces: namespaces, + Severities: severities, + Kinds: splitCSV(q.Get("kind")), + Limit: parseLimit(q.Get("limit")), CanReadClusterScoped: func(kind, group string) bool { if auth.UserFromContext(r.Context()) == nil { return true @@ -145,22 +99,6 @@ func (s *Server) handleIssues(w http.ResponseWriter, r *http.Request) { resp["filter_errors"] = stats.FilterErrors resp["filter_error_sample"] = stats.FilterErrorSample } - // When the caller asked for Kyverno findings (either via opt-in flag - // or source=kyverno), surface the index lifecycle phase under - // `meta.kyverno`. Without this, an empty list collapses four distinct - // states (not_installed / deferred / warmup / ready-but-empty) into - // one and the SPA + agents can't render the right copy. Emitted on - // every kyverno-touching request — agents can ignore it, but humans - // in the SPA get a clear "Kyverno not installed" vs "Indexing in - // progress" vs "No violations" distinction. - if filters.IncludeKyverno { - meta, _ := resp["meta"].(map[string]any) - if meta == nil { - meta = map[string]any{} - } - meta["kyverno"] = provider.KyvernoStatus() - resp["meta"] = meta - } s.writeJSON(w, resp) } @@ -186,19 +124,6 @@ func parseSeverities(v string) ([]issues.Severity, error) { return out, nil } -// hasSource reports whether the caller's `?source=` list explicitly -// names `target`. Used to derive the opt-in flags for event / Kyverno -// sources — passing them in the source list is more -// discoverable than the parallel include_* booleans, and we honor both. -func hasSource(v, target string) bool { - for _, p := range strings.Split(v, ",") { - if strings.EqualFold(strings.TrimSpace(p), target) { - return true - } - } - return false -} - func splitCSV(v string) []string { if v == "" { return nil @@ -212,17 +137,3 @@ func splitCSV(v string) []string { } return out } - -func parseDuration(v string) (time.Duration, error) { - if v == "" { - return 0, nil - } - d, err := time.ParseDuration(v) - if err != nil { - return 0, fmt.Errorf("invalid since=%q: %w", v, err) - } - if d < 0 { - return 0, fmt.Errorf("since must be non-negative, got %s", d) - } - return d, nil -} diff --git a/internal/server/issues_handler_test.go b/internal/server/issues_handler_test.go deleted file mode 100644 index bdb68e27a..000000000 --- a/internal/server/issues_handler_test.go +++ /dev/null @@ -1,180 +0,0 @@ -package server - -import ( - "encoding/json" - "io" - "net/http" - "strings" - "testing" - - "github.com/skyhook-io/radar/internal/k8s" -) - -// TestIssuesHandler_KyvernoMetaEmittedOnOptIn pins the meta.kyverno field -// emission on /api/issues when source=kyverno (or include_kyverno=true) -// is requested. Without this, a Kyverno-aware caller can't distinguish -// "Kyverno not installed" from "warmup deferred" from "ready but no -// violations" — they all look like an empty issues list. -// -// We exercise three of the four states by manipulating the package-level -// warmup-decision atomic directly. The fourth state ("warmup") is the -// implicit default before any decision is recorded; we cover the -// transitions in the policy_reports_test.go state-machine test. -func TestIssuesHandler_KyvernoMetaEmittedOnOptIn(t *testing.T) { - // Snapshot + restore the kyverno globals so we don't bleed into - // other server tests running against the same testServer singleton. - origDecision := loadKyvernoDecisionForTest() - origIdx := loadKyvernoIndexForTest() - t.Cleanup(func() { - storeKyvernoDecisionForTest(k8s.KyvernoStatus(origDecision)) - storeKyvernoIndexForTest(origIdx) - }) - - cases := []struct { - name string - setup func() - wantMeta string - queryParam string // "include_kyverno=true" or "source=kyverno" - }{ - { - name: "not_installed surfaces in meta.kyverno", - setup: func() { - storeKyvernoIndexForTest(nil) - storeKyvernoDecisionForTest(k8s.KyvernoStatusNotInstalled) - }, - wantMeta: "not_installed", - queryParam: "include_kyverno=true", - }, - { - name: "deferred surfaces in meta.kyverno", - setup: func() { - storeKyvernoIndexForTest(nil) - storeKyvernoDecisionForTest(k8s.KyvernoStatusDeferred) - }, - wantMeta: "deferred", - queryParam: "source=kyverno", - }, - { - name: "ready surfaces in meta.kyverno (empty findings list is meaningful)", - setup: func() { - // Real index instance, no findings populated → ready but empty. - storeKyvernoIndexForTest(newEmptyIndexForTest()) - storeKyvernoDecisionForTest(k8s.KyvernoStatusReady) - }, - wantMeta: "ready", - queryParam: "include_kyverno=true", - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - tc.setup() - - resp, err := http.Get(testServer.URL + "/api/issues?" + tc.queryParam) - if err != nil { - t.Fatalf("GET /api/issues: %v", err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - t.Fatalf("status: got %d want 200", resp.StatusCode) - } - - var body map[string]any - if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { - t.Fatalf("decode: %v", err) - } - - meta, ok := body["meta"].(map[string]any) - if !ok { - t.Fatalf("response missing meta object: %+v", body) - } - gotKyv, _ := meta["kyverno"].(string) - if gotKyv != tc.wantMeta { - t.Errorf("meta.kyverno: got %q want %q (full body: %+v)", gotKyv, tc.wantMeta, body) - } - }) - } -} - -// TestIssuesHandler_KyvernoMetaOmittedWhenNotRequested pins the inverse: -// when the caller did NOT ask for Kyverno (no source=kyverno, no -// include_kyverno), we don't emit meta.kyverno. This keeps default -// responses lean — agents not aware of Kyverno don't get a noisy field, -// and the SPA's default issue view stays clean. -func TestIssuesHandler_KyvernoMetaOmittedWhenNotRequested(t *testing.T) { - origDecision := loadKyvernoDecisionForTest() - origIdx := loadKyvernoIndexForTest() - t.Cleanup(func() { - storeKyvernoDecisionForTest(k8s.KyvernoStatus(origDecision)) - storeKyvernoIndexForTest(origIdx) - }) - // Even if Kyverno is "ready", omitted from response when caller - // didn't request it. - storeKyvernoIndexForTest(newEmptyIndexForTest()) - storeKyvernoDecisionForTest(k8s.KyvernoStatusReady) - - resp, err := http.Get(testServer.URL + "/api/issues") - if err != nil { - t.Fatalf("GET /api/issues: %v", err) - } - defer resp.Body.Close() - - var body map[string]any - if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { - t.Fatalf("decode: %v", err) - } - if _, ok := body["meta"]; ok { - t.Errorf("default request should not emit meta.kyverno; got %+v", body["meta"]) - } -} - -func TestIssuesHandlerRejectsAuditSource(t *testing.T) { - resp, err := http.Get(testServer.URL + "/api/issues?source=audit") - if err != nil { - t.Fatalf("GET /api/issues: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusBadRequest { - t.Fatalf("status: got %d want 400", resp.StatusCode) - } - body, err := io.ReadAll(resp.Body) - if err != nil { - t.Fatalf("read body: %v", err) - } - if !strings.Contains(string(body), "use GET /api/audit") { - t.Fatalf("error did not point caller to /api/audit: %q", string(body)) - } -} - -// --- Test helpers: cross-package access to k8s package state via exported -// hooks. We use go:linkname-style accessors registered as test-only helpers -// in the k8s package; here we route through the public API where possible -// and otherwise via the package's own globals through a small bridge. -// -// We can't `go:linkname` into internal/k8s from another package easily -// without a forward declaration, so we route through small exported -// helpers in the k8s package's _test.go side. Since k8s already exposes -// ResetPolicyReportIndex publicly, we reuse that for resetting; for -// arbitrary state injection (needed here) we add bridge funcs in the -// k8s package guarded by a build tag-free in-package test file (see -// policy_reports_test_export_test.go-equivalent below). - -// loadKyvernoDecisionForTest / store / loadIndex / store / newEmptyIndex -// are thin wrappers around exported test hooks in internal/k8s. - -func loadKyvernoDecisionForTest() string { - return string(k8s.LoadKyvernoDecisionForTest()) -} -func storeKyvernoDecisionForTest(s k8s.KyvernoStatus) { - k8s.StoreKyvernoDecisionForTest(s) -} -func loadKyvernoIndexForTest() any { - return k8s.LoadKyvernoIndexForTest() -} -func storeKyvernoIndexForTest(v any) { - k8s.StoreKyvernoIndexForTest(v) -} -func newEmptyIndexForTest() any { - return k8s.NewEmptyKyvernoIndexForTest() -} diff --git a/internal/server/server.go b/internal/server/server.go index 6e9322e1a..1afda2dc7 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -1472,6 +1472,17 @@ func (s *Server) handleListResources(w http.ResponseWriter, r *http.Request) { return cache.LimitRanges().LimitRanges(ns).List(labels.Everything()) }, ) + case "resourcequotas": + if cache.ResourceQuotas() == nil { + notReadyOrForbidden("resourcequotas") + return + } + result, err = listPerNs( + func() (any, error) { return cache.ResourceQuotas().List(labels.Everything()) }, + func(ns string) (any, error) { + return cache.ResourceQuotas().ResourceQuotas(ns).List(labels.Everything()) + }, + ) case "networkpolicies", "netpol": if cache.NetworkPolicies() == nil { notReadyOrForbidden("networkpolicies") @@ -1815,6 +1826,12 @@ func (s *Server) handleGetResource(w http.ResponseWriter, r *http.Request) { return } resource, err = cache.LimitRanges().LimitRanges(namespace).Get(name) + case "resourcequotas", "resourcequota": + if cache.ResourceQuotas() == nil { + notReadyOrForbiddenGet("resourcequotas") + return + } + resource, err = cache.ResourceQuotas().ResourceQuotas(namespace).Get(name) case "roles", "role": if cache.Roles() == nil { forbiddenGet("roles") diff --git a/internal/summarycontext/summarycontext.go b/internal/summarycontext/summarycontext.go index e375e6ab2..3f52727c3 100644 --- a/internal/summarycontext/summarycontext.go +++ b/internal/summarycontext/summarycontext.go @@ -171,11 +171,11 @@ func CanonicalSingular(kind string) string { // bucketing for a per-resource lookup, not paginating — the caller of // the builder never sees the issue list itself. // -// We rely on Filters.IncludeAudit and Filters.IncludeEvents staying -// false-by-default — that's what keeps the per-row count to "problem" -// + "condition" only. Audit + Warning events are loud and require -// explicit opt-in; rolling them into the per-row count would distort -// "this Pod has 1 issue" for the common case. +// The per-row count reflects exactly the curated operational sources +// Compose runs (problem + missing_ref + scheduling + condition). Loud +// adjacent signals — raw Warning events and policy/audit posture — are +// not issue sources at all, so they can't distort "this Pod has 1 issue" +// for the common case. // // No Kinds filter on Compose: the index buckets every composed row by // (group, kind, ns, name), and the per-row lookup keys off diff --git a/internal/summarycontext/summarycontext_test.go b/internal/summarycontext/summarycontext_test.go index aa23ac50d..9edbbf2e6 100644 --- a/internal/summarycontext/summarycontext_test.go +++ b/internal/summarycontext/summarycontext_test.go @@ -55,6 +55,7 @@ func (f *fakeIssuesProvider) DetectProblems(namespaces []string) []k8s.Problem { } func (f *fakeIssuesProvider) DetectCAPIProblems(_ []string) []k8s.Problem { return nil } func (f *fakeIssuesProvider) DetectMissingRefs(_ []string) []k8s.Problem { return nil } +func (f *fakeIssuesProvider) DetectScheduling(_ []string) []k8s.Problem { return nil } func (f *fakeIssuesProvider) WarningEvents(_ []string, _ time.Duration) []*corev1.Event { return nil } diff --git a/packages/k8s-ui/src/components/resources/renderers/NamespaceRenderer.tsx b/packages/k8s-ui/src/components/resources/renderers/NamespaceRenderer.tsx index bc8008134..85e2aee6a 100644 --- a/packages/k8s-ui/src/components/resources/renderers/NamespaceRenderer.tsx +++ b/packages/k8s-ui/src/components/resources/renderers/NamespaceRenderer.tsx @@ -1,8 +1,10 @@ -import { Shield, Box, Users } from 'lucide-react' +import { Shield, Box, Users, Gauge } from 'lucide-react' import { clsx } from 'clsx' import { Section, PropertyList, Property, ResourceLink } from '../../ui/drawer-components' import type { RBACNamespaceResponse, RBACBindingWithSubjects, RBACSubject, ResourceRef } from '../../../types' import { rbacKindBadgeClass } from '../../../utils/rbac-badges' +import { SEVERITY_TEXT, SEVERITY_DOT } from '../../../utils/badge-colors' +import { parseCPUToNanocores, parseMemoryToBytes } from '../../../utils/format' interface NamespaceRendererProps { data: any @@ -14,10 +16,23 @@ interface NamespaceRendererProps { rbacData?: RBACNamespaceResponse | null rbacLoading?: boolean rbacError?: Error | null + /** + * ResourceQuota objects for this namespace (from /api/resources/ + * resourcequotas?namespace=). Undefined when the host hasn't wired the + * fetch (quota section omitted). A saturated quota is exactly why a + * namespace stops admitting pods, yet it's shown nowhere else. + */ + quotaData?: any[] | null + /** + * Non-403 quota fetch error. When set, the quota section renders a note + * instead of silently disappearing — so a quota-constrained namespace whose + * fetch 500/503s isn't mistaken for quota-free. (403 stays hidden upstream.) + */ + quotaError?: Error | null onNavigate?: (ref: ResourceRef) => void } -export function NamespaceRenderer({ data, rbacData, rbacLoading, rbacError, onNavigate }: NamespaceRendererProps) { +export function NamespaceRenderer({ data, rbacData, rbacLoading, rbacError, quotaData, quotaError, onNavigate }: NamespaceRendererProps) { const metadata = data.metadata || {} const status = data.status || {} const phase = status.phase @@ -48,6 +63,11 @@ export function NamespaceRenderer({ data, rbacData, rbacLoading, rbacError, onNa + {/* ResourceQuota usage — only when host wired the fetch. */} + {(quotaError || (quotaData != null && quotaData.length > 0)) && ( + + )} + {/* RBAC summary — only when host wired the fetch. */} {rbacData !== undefined && ( parseFloat(v) || 0 + const h = parse(hard) + if (!h) return null + return parse(used || '0') / h +} + +function NamespaceQuotaSection({ quotas, error }: { quotas: any[]; error?: Error | null }) { + return ( +
+ {error && ( +
+ Couldn’t load resource quotas — retry shortly. A quota at its limit blocks new pods in this namespace. +
+ )} +
+ {quotas.map((q: any, qi: number) => { + const name = q?.metadata?.name ?? `quota-${qi}` + const hard: Record = q?.status?.hard ?? q?.spec?.hard ?? {} + const used: Record = q?.status?.used ?? {} + const resourceNames = Object.keys(hard).sort() + return ( +
+
{name}
+ {resourceNames.length === 0 ? ( +
No hard limits set.
+ ) : ( +
+ {resourceNames.map((res) => { + const ratio = quotaUsageRatio(res, used[res] ?? '0', hard[res]) + const pct = ratio === null ? null : Math.min(100, Math.round(ratio * 100)) + const tone = + ratio === null ? SEVERITY_TEXT.neutral + : ratio >= 1 ? SEVERITY_TEXT.error + : ratio >= 0.9 ? SEVERITY_TEXT.alert + : SEVERITY_TEXT.neutral + const barTone = + ratio === null ? 'bg-theme-border' + : ratio >= 1 ? SEVERITY_DOT.error + : ratio >= 0.9 ? SEVERITY_DOT.alert + : 'bg-theme-text-tertiary' + return ( +
+
+ {res} + + {used[res] ?? '0'} / {hard[res]}{pct !== null && ` (${pct}%)`} + +
+ {pct !== null && ( +
+
+
+ )} +
+ ) + })} +
+ )} +
+ ) + })} +
+
+ ) +} + // ============================================================================ // NAMESPACE RBAC SECTION // ============================================================================ diff --git a/packages/k8s-ui/src/components/resources/renderers/PodRenderer.tsx b/packages/k8s-ui/src/components/resources/renderers/PodRenderer.tsx index 929929a58..42286008b 100644 --- a/packages/k8s-ui/src/components/resources/renderers/PodRenderer.tsx +++ b/packages/k8s-ui/src/components/resources/renderers/PodRenderer.tsx @@ -338,9 +338,12 @@ export function PodRenderer({
    {podProblems.map((p, i) => ( -
  • - - {p.message} +
  • + + + {p.message} + {p.detail && : {p.detail}} +
  • ))}
diff --git a/packages/k8s-ui/src/components/resources/resource-utils.ts b/packages/k8s-ui/src/components/resources/resource-utils.ts index af9d0c2b4..e9fda3bb8 100644 --- a/packages/k8s-ui/src/components/resources/resource-utils.ts +++ b/packages/k8s-ui/src/components/resources/resource-utils.ts @@ -48,6 +48,27 @@ export const healthColors: Record = { export interface PodProblem { severity: 'critical' | 'high' | 'medium' message: string + // detail carries extra human context shown after the short message (e.g. + // the scheduler's verdict for an Unschedulable pod). message stays the + // stable short label so filter-chip matching (podMatchesProblemCategory) + // and known-pattern checks keep working on exact strings. + detail?: string +} + +/** + * Condense a kube-scheduler verdict (the PodScheduled=False / FailedScheduling + * message) for display: drop the "0/N nodes are available:" prefix and the + * "preemption: …" tail, keeping the per-predicate clause list — which already + * names untolerated taints, insufficient resources, and affinity/selector + * misses. Presentation-only; the backend `scheduling` issue source does the + * structured decomposition + node-label resolution (e.g. naming arm64). + */ +export function summarizeSchedulerMessage(message?: string): string { + if (!message) return '' + let m = message.split('. preemption:')[0].split(' preemption:')[0].trim() + const colon = m.indexOf(':') + if (colon >= 0) m = m.slice(colon + 1).trim() + return m.replace(/\.\s*$/, '').trim() } /** Tailwind classes for severity dot indicators (used in tooltips and alert banners) */ @@ -302,7 +323,7 @@ export function getPodProblems(pod: any): PodProblem[] { for (const cond of conditions) { if (cond.type === 'PodScheduled' && cond.status === 'False') { if (cond.reason === 'Unschedulable') { - problems.push({ severity: 'high', message: 'Unschedulable' }) + problems.push({ severity: 'high', message: 'Unschedulable', detail: summarizeSchedulerMessage(cond.message) || undefined }) } } // Readiness/Liveness probe failures diff --git a/packages/k8s-ui/src/components/resources/summarize-scheduler-message.test.ts b/packages/k8s-ui/src/components/resources/summarize-scheduler-message.test.ts new file mode 100644 index 000000000..ec70f835c --- /dev/null +++ b/packages/k8s-ui/src/components/resources/summarize-scheduler-message.test.ts @@ -0,0 +1,30 @@ +import { describe, it, expect } from 'vitest' +import { summarizeSchedulerMessage } from './resource-utils' + +describe('summarizeSchedulerMessage', () => { + it('strips the "0/N nodes are available:" prefix and the preemption tail', () => { + const msg = + '0/5 nodes are available: 2 Insufficient cpu, 3 node(s) had untolerated taint {dedicated: gpu}. ' + + 'preemption: 0/5 nodes are available: 5 No preemption victims found for incoming pod.' + expect(summarizeSchedulerMessage(msg)).toBe( + '2 Insufficient cpu, 3 node(s) had untolerated taint {dedicated: gpu}', + ) + }) + + it('returns the clause list without a node prefix unchanged (minus trailing period)', () => { + expect(summarizeSchedulerMessage('0/2 nodes are available: 2 Insufficient memory.')).toBe( + '2 Insufficient memory', + ) + }) + + it('handles the bare " preemption:" tail variant', () => { + expect( + summarizeSchedulerMessage('0/3 nodes are available: 3 Insufficient cpu preemption: not helpful'), + ).toBe('3 Insufficient cpu') + }) + + it('returns empty string for empty/undefined input (so detail is omitted, message stays the stable label)', () => { + expect(summarizeSchedulerMessage('')).toBe('') + expect(summarizeSchedulerMessage(undefined)).toBe('') + }) +}) diff --git a/packages/k8s-ui/src/components/topology/K8sResourceNode.tsx b/packages/k8s-ui/src/components/topology/K8sResourceNode.tsx index 2a45d734a..8f5333a14 100644 --- a/packages/k8s-ui/src/components/topology/K8sResourceNode.tsx +++ b/packages/k8s-ui/src/components/topology/K8sResourceNode.tsx @@ -43,13 +43,63 @@ function getIssueTooltip(issue: string | undefined): React.ReactNode { Pending: { title: 'Pending', description: 'Pod is waiting to be scheduled to a node.', - action: 'Check for resource constraints or node availability.', + action: 'Open the pod to see the scheduler verdict (taints, resources, affinity).', }, FailedScheduling: { title: 'Scheduling Failed', description: 'No suitable node found for this pod.', action: 'Check node resources, taints, tolerations, and affinity rules.', }, + Unschedulable: { + title: 'Unschedulable', + description: 'The scheduler tried every node and none fit.', + action: 'Open the pod for the decomposed reason — arch/OS mismatch, untolerated taint, insufficient resources, or affinity.', + }, + QuotaExceeded: { + title: 'ResourceQuota Exceeded', + description: 'A namespace ResourceQuota is at its hard limit, so new pods are rejected at admission.', + action: 'Open the namespace to see quota usage; raise the quota or free usage.', + }, + QuotaNearLimit: { + title: 'ResourceQuota Near Limit', + description: 'A namespace ResourceQuota is close to its hard limit and will soon block new pods.', + action: 'Open the namespace to see quota usage.', + }, + IPExhaustion: { + title: 'IP Exhaustion (CNI)', + description: 'The pod was scheduled but the CNI could not assign an IP — the node/subnet pool is exhausted.', + action: 'Free IPs, scale the subnet/ENI pool, or move the pod to a node with capacity.', + }, + SandboxCreationFailed: { + title: 'Sandbox Creation Failed', + description: 'The kubelet could not create the pod sandbox.', + action: 'Check kubelet/CNI events on the node.', + }, + VolumeMount: { + title: 'Volume Mount Failed', + description: 'The pod was scheduled but a volume could not be mounted.', + action: 'Check the PVC/PV binding and the CSI driver on the node.', + }, + VolumeAttach: { + title: 'Volume Attach Failed', + description: 'A volume could not be attached to the node.', + action: 'Check the CSI driver and cloud-provider attach limits.', + }, + VolumeMultiAttach: { + title: 'Volume Multi-Attach', + description: 'The volume is still attached to another node — a RWO volume cannot attach in two places.', + action: 'Wait for the old pod to terminate, or cordon/drain the stale node.', + }, + PodSecurityViolation: { + title: 'Pod Security Violation', + description: 'Pod Security Admission rejected the pod template at admission.', + action: 'Align the pod securityContext with the namespace PSA level.', + }, + WebhookDenied: { + title: 'Admission Webhook Denied', + description: 'A validating/mutating admission webhook rejected pod creation.', + action: 'Check the webhook policy that denied the request.', + }, Evicted: { title: 'Pod Evicted', description: 'Pod was evicted from the node (usually due to resource pressure).', diff --git a/pkg/k8score/cache.go b/pkg/k8score/cache.go index 4bfa0f9f6..5a0278e1c 100644 --- a/pkg/k8score/cache.go +++ b/pkg/k8score/cache.go @@ -938,6 +938,9 @@ func buildInformerSetups() []informerSetup { mk(LimitRanges, "LimitRange", false, false, func(f informers.SharedInformerFactory) cache.SharedIndexInformer { return f.Core().V1().LimitRanges().Informer() }), + mk(ResourceQuotas, "ResourceQuota", false, false, func(f informers.SharedInformerFactory) cache.SharedIndexInformer { + return f.Core().V1().ResourceQuotas().Informer() + }), } } @@ -1404,6 +1407,7 @@ var allKindListers = []kindLister{ {"RoleBinding", "rbac.authorization.k8s.io", func(rc *ResourceCache) any { return rc.RoleBindings() }}, {"ClusterRoleBinding", "rbac.authorization.k8s.io", func(rc *ResourceCache) any { return rc.ClusterRoleBindings() }}, {"LimitRange", "", func(rc *ResourceCache) any { return rc.LimitRanges() }}, + {"ResourceQuota", "", func(rc *ResourceCache) any { return rc.ResourceQuotas() }}, } // AllKindListers returns the table of all resource kinds with their group and lister. diff --git a/pkg/k8score/listers.go b/pkg/k8score/listers.go index 564fd6af0..44877fd2c 100644 --- a/pkg/k8score/listers.go +++ b/pkg/k8score/listers.go @@ -220,6 +220,13 @@ func (rc *ResourceCache) LimitRanges() listerscorev1.LimitRangeLister { return rc.factoryFor(LimitRanges).Core().V1().LimitRanges().Lister() } +func (rc *ResourceCache) ResourceQuotas() listerscorev1.ResourceQuotaLister { + if rc == nil || !rc.isEnabled(ResourceQuotas) { + return nil + } + return rc.factoryFor(ResourceQuotas).Core().V1().ResourceQuotas().Lister() +} + // listCountNamespaced counts items from a lister filtered to specific namespaces. // If namespaces is empty, it returns the total count (same as listCount). func listCountNamespaced(lister any, namespaces []string) int { @@ -277,6 +284,9 @@ func listCountInNamespace(lister any, ns string) int { case listerscorev1.LimitRangeLister: items, _ := l.LimitRanges(ns).List(labels.Everything()) return len(items) + case listerscorev1.ResourceQuotaLister: + items, _ := l.ResourceQuotas(ns).List(labels.Everything()) + return len(items) case listersrbacv1.RoleLister: items, _ := l.Roles(ns).List(labels.Everything()) return len(items) @@ -361,6 +371,9 @@ func listCount(lister any) int { case listerscorev1.LimitRangeLister: items, _ := l.List(labels.Everything()) return len(items) + case listerscorev1.ResourceQuotaLister: + items, _ := l.List(labels.Everything()) + return len(items) case listersrbacv1.RoleLister: items, _ := l.List(labels.Everything()) return len(items) diff --git a/pkg/k8score/types.go b/pkg/k8score/types.go index a525b7df6..c1e55419b 100644 --- a/pkg/k8score/types.go +++ b/pkg/k8score/types.go @@ -47,6 +47,7 @@ const ( RoleBindings ResourceType = "rolebindings" ClusterRoleBindings ResourceType = "clusterrolebindings" LimitRanges ResourceType = "limitranges" + ResourceQuotas ResourceType = "resourcequotas" ) // Operation constants for resource change events. diff --git a/web/src/api/quotas.ts b/web/src/api/quotas.ts new file mode 100644 index 000000000..e441bd85b --- /dev/null +++ b/web/src/api/quotas.ts @@ -0,0 +1,16 @@ +import { useQuery } from '@tanstack/react-query' +import { fetchJSON } from './client' + +// useNamespaceQuotas fetches a namespace's ResourceQuota objects via +// /api/resources/resourcequotas?namespace= (a bare array). Backs the +// NamespaceRenderer quota-usage section — quota saturation is otherwise +// surfaced nowhere in the UI, yet it's exactly why a namespace stops +// admitting new pods. +export function useNamespaceQuotas(namespace: string, enabled = true) { + return useQuery({ + queryKey: ['resourcequotas', namespace], + queryFn: () => fetchJSON(`/resources/resourcequotas?namespace=${encodeURIComponent(namespace)}`), + enabled: enabled && !!namespace, + staleTime: 15000, + }) +} diff --git a/web/src/components/resources/renderers/NamespaceRenderer.tsx b/web/src/components/resources/renderers/NamespaceRenderer.tsx index 70f22c4fd..ece95958a 100644 --- a/web/src/components/resources/renderers/NamespaceRenderer.tsx +++ b/web/src/components/resources/renderers/NamespaceRenderer.tsx @@ -1,6 +1,8 @@ import { NamespaceRenderer as BaseNamespaceRenderer } from '@skyhook-io/k8s-ui/components/resources/renderers/NamespaceRenderer' import type { ResourceRef } from '@skyhook-io/k8s-ui' import { useRBACNamespace } from '../../../api/rbac' +import { useNamespaceQuotas } from '../../../api/quotas' +import { isForbiddenError } from '../../../api/client' interface NamespaceRendererProps { data: any @@ -10,12 +12,19 @@ interface NamespaceRendererProps { export function NamespaceRenderer({ data, onNavigate }: NamespaceRendererProps) { const name = data?.metadata?.name ?? '' const { data: rbacData, isLoading, error } = useRBACNamespace(name, !!name) + const { data: quotaData, error: quotaError } = useNamespaceQuotas(name, !!name) + // 403 → the user can't see quotas; hide the section (same posture as the + // RBAC sections). Surface other errors (500/503) so a quota-constrained + // namespace doesn't silently render as quota-free. + const quotaErr = quotaError && !isForbiddenError(quotaError) ? (quotaError as Error) : null return ( )