skyhook-io · nadaverell · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
@@ -198,7 +198,7 @@ Pod **Permissions** is the differentiator — frames the SA's grant as blast rad
 
 ### MCP Server
 
-Stateless HTTP at `/mcp` (JSON-RPC). Read tools use `readOnlyHint`, write tools use `destructiveHint: false`. Respects cluster RBAC (impersonates via `DynamicClientFromContext` for write/exec/logs). Enabled by default; `--no-mcp` to disable. Tool catalogue + design rationale lives in `internal/mcp/tools.go` + [docs/mcp.md](docs/mcp.md) — don't restate it here.
+Stateless HTTP at `/mcp` (JSON-RPC). Read tools use `readOnlyHint`, write tools use `destructiveHint: true`. Respects cluster RBAC (impersonates via `DynamicClientFromContext` for write/exec/logs). Enabled by default; `--no-mcp` to disable. Tool catalogue + design rationale lives in `internal/mcp/tools.go` + [docs/mcp.md](docs/mcp.md) — don't restate it here.
 
 ### Error Handling (Backend)
 

@@ -352,13 +352,13 @@ No auth by default (local use). See the **[Authentication Guide](docs/authentica
 
 Radar auto-discovers any CRD in your cluster. Popular tools get [dedicated integrations](docs/integrations.md) with topology edges, detail views, and AI summaries.
 
-**Default chart RBAC** covers the built-in Kubernetes kinds listed below — Workloads, Networking (including NetworkPolicies and PodDisruptionBudgets), Configuration, Storage (PersistentVolumes, PersistentVolumeClaims, StorageClasses), HorizontalPodAutoscalers, ServiceAccounts, LimitRanges, Nodes, Namespaces, and Events. RBAC objects (Roles, ClusterRoles, RoleBindings, ClusterRoleBindings) are opt-in via `rbac.viewRBAC=true`. **CRD-based integrations** (Gateway API, VerticalPodAutoscaler, ArgoCD, FluxCD, cert-manager, etc.) need both the CRD installed in your cluster *and* read access granted — most groups are default-on under `rbac.crdGroups.<name>` (e.g. `gatewayApi`, `verticalPodAutoscaler`); check `values.yaml` or add custom rules via `rbac.additionalRules`.
+**Default chart RBAC** covers the built-in Kubernetes kinds listed below — Workloads, Networking (including NetworkPolicies and PodDisruptionBudgets), Configuration, Storage (PersistentVolumes, PersistentVolumeClaims, StorageClasses), HorizontalPodAutoscalers, ServiceAccounts, LimitRanges, ResourceQuotas, Nodes, Namespaces, and Events. RBAC objects (Roles, ClusterRoles, RoleBindings, ClusterRoleBindings) are opt-in via `rbac.viewRBAC=true`. **CRD-based integrations** (Gateway API, VerticalPodAutoscaler, ArgoCD, FluxCD, cert-manager, etc.) need both the CRD installed in your cluster *and* read access granted — most groups are default-on under `rbac.crdGroups.<name>` (e.g. `gatewayApi`, `verticalPodAutoscaler`); check `values.yaml` or add custom rules via `rbac.additionalRules`.
 
 | Category | Resources |
 |----------|-----------|
 | **Workloads** | Deployments, DaemonSets, StatefulSets, ReplicaSets, Pods, Jobs, CronJobs |
 | **Networking** | Services, Ingresses, NetworkPolicies, Endpoints, PodDisruptionBudgets |
-| **Configuration** | ConfigMaps, Secrets (names only, values hidden) |
+| **Configuration** | ConfigMaps, Secrets (names only, values hidden), LimitRanges, ResourceQuotas |
 | **Storage** | PersistentVolumeClaims, PersistentVolumes, StorageClasses |
 | **Autoscaling** | HorizontalPodAutoscalers, VerticalPodAutoscalers |
 | **Cluster** | Nodes, Namespaces, ServiceAccounts, Events |

@@ -22,6 +22,7 @@ rules:
       - serviceaccounts
       - endpoints
       - limitranges
+      - resourcequotas
     verbs: ["get", "list", "watch"]
   - apiGroups:
       - policy

@@ -213,7 +213,7 @@ rules:
   - apiGroups: ["", "apps", "batch", "networking.k8s.io"]
     resources: ["pods", "services", "deployments", "daemonsets", "statefulsets",
                 "replicasets", "jobs", "cronjobs", "configmaps", "events",
-                "ingresses", "persistentvolumeclaims"]
+                "ingresses", "persistentvolumeclaims", "resourcequotas"]
     verbs: ["get", "list", "watch"]
   - apiGroups: [""]
     resources: ["pods/log"]

@@ -878,7 +878,7 @@ Deferred to a future "full Crossplane" pass:
 | PolicyReport | `wgpolicyk8s.io/v1alpha2` | — | Yes | Yes |
 | ClusterPolicyReport | `wgpolicyk8s.io/v1alpha2` | — | Yes | Yes |
 
-PolicyReport findings also surface through the unified `/api/issues` endpoint (and the MCP `issues` tool) when opted in via `source=kyverno` / `include_kyverno=true` — `fail` and `error` results map to `critical`, `warn` maps to `warning`, and `pass` / `skip` are omitted.
+PolicyReport findings are policy posture, not live operational failure, so they are **not** part of the `/api/issues` stream. They surface per-resource: the PolicyReport detail view (above) and the `resourceContext` policy rollup on a resource fetched via `get_resource`. (The cluster audit — `/api/audit` + MCP `get_cluster_audit` — is radar's own static best-practice scanner and does **not** include PolicyReport results.)
 
 ---
 

@@ -16,7 +16,7 @@ Radar's MCP server solves these:
 
 - **Token-optimized** — resources are minified, stripping noise (managed fields, internal annotations, redundant status) while preserving what matters
 - **Enriched data** — topology graphs, health assessments, deduplicated events, filtered logs (prioritizing errors/warnings)
-- **Safe operations** — read tools are read-only; write tools (restart, scale, sync) are clearly annotated and non-destructive
+- **Safe operations** — read tools are read-only (`readOnlyHint`); write tools (restart, scale, rollback, sync, apply, cordon/drain) are RBAC-enforced and annotated `destructiveHint` so AI clients can prompt for confirmation
 - **Secret-safe** — Secret data is never exposed, environment values are redacted, log output is scrubbed for API keys and tokens
 - **RBAC-aware** — respects your cluster's RBAC permissions
 - **Vendor-neutral** — works with any MCP-compatible AI tool
@@ -165,17 +165,24 @@ Add to `~/.gemini/settings.json`:
 
 | Tool | Description | Parameters |
 |------|-------------|------------|
-| `get_dashboard` | Cluster health overview — resource counts, problems, warning events, Helm status. Includes recent changes correlated with detected problems. | `namespace` (optional) |
-| `list_resources` | List resources of a kind with minified summaries (pods, deployments, services, CRDs, etc.) | `kind` (required), `namespace` (optional) |
-| `get_resource` | Detailed view of a single resource — minified spec + status + metadata + default-on `resourceContext` (managedBy / exposes / selectedBy / uses / runsOn / issue+audit rollups). Optionally include heavier sidecars (events / metrics / logs). | `kind` (required), `namespace` (optional — omit for cluster-scoped kinds: Node, ClusterRole, IngressClass, etc.), `name` (required), `group` (optional, for ambiguous kinds), `include` (optional: `events,metrics,logs`), `context` (optional: `basic` default, `none` for bare minified output) |
-| `get_topology` | Topology graph showing resource relationships (nodes and edges). Use `summary` format for LLM-friendly text descriptions of resource chains. | `namespace` (optional), `view` (optional: `traffic` or `resources`), `format` (optional: `graph` or `summary`) |
-| `get_events` | Recent Kubernetes events, deduplicated and sorted by recency. Filter by resource kind/name to scope to a specific resource. | `namespace` (optional), `limit` (optional, default 20, max 100), `kind` (optional), `name` (optional) |
-| `get_pod_logs` | Filtered pod logs prioritizing errors/warnings, with secret redaction | `namespace` (required), `name` (required), `container` (optional), `tail_lines` (optional, default 200) |
-| `list_namespaces` | List all namespaces with status | (none) |
+| `issues` | "What's broken right now?" — a ranked, curated stream of live operational failures: failing workloads/pods, dangling references, pod-startup blockers (unschedulable / admission-rejected / stuck post-bind), and False CRD conditions. No source filter; each row carries a `source` label sliceable via `filter`. For static posture use `get_cluster_audit`; for raw events use `get_events`. | `namespace` (optional), `severity` (optional: `critical,warning`), `kind` (optional), `filter` (optional CEL), `limit` (optional, default 200, max 1000) |
+| `diagnose` | Root-cause one workload (Pod/Deployment/StatefulSet/DaemonSet) in a single call: minified resource + `resourceContext` + current AND previous container logs across its pods + filtered events + a `startupBlockers` section when it can't reach Running. Replaces a `get_resource → events → logs → logs(previous)` chain. | `kind` (required), `namespace` (required), `name` (required) |
+| `get_dashboard` | Cluster/namespace health overview — resource counts, failing pods, unhealthy workloads, warning events, Helm status. Inventory-style triage before drilling in. | `namespace` (optional) |
+| `top_resources` | Live metrics ranked like `kubectl top | sort`, joined with K8s context (status, restarts, owner, requests/limits). Use for CPU/memory/OOM/load symptoms. | `kind` (optional: `pods` default, `workloads`, `nodes`), `namespace` (optional), `sort` (optional: `cpu` default, `memory`), `limit` (optional, default 20, max 100) |
+| `list_resources` | List resources of a kind with minified summaries + per-row `summaryContext` (managedBy / health / issueCount). | `kind` (required), `group` (optional), `namespace` (optional), `context` (optional: default / `none`) |
+| `search` | Find resources by content/term match (config keys, env refs, images, label values, CRD fields, status messages). Tokens AND'd; secret values never indexed. Supports `kind:`/`ns:`/`label:`/`image:` modifiers and CEL `filter`. | `query` (required), `filter` (optional CEL), `limit` (optional) |
+| `get_resource` | Detailed view of a single resource — minified spec + status + metadata + default-on `resourceContext` (managedBy / exposes / selectedBy / uses / runsOn / issue+audit rollups). Optionally include heavier sidecars (events / metrics). For logs use `get_pod_logs` / `get_workload_logs` / `diagnose`. | `kind` (required), `namespace` (optional — omit for cluster-scoped kinds: Node, ClusterRole, IngressClass, etc.), `name` (required), `group` (optional, for ambiguous kinds), `include` (optional: `events,metrics`), `context` (optional: `basic` default, `none` for bare minified output) |
+| `get_topology` | Whole-namespace/cluster topology graph (nodes + edges). Use `summary` format for LLM-friendly text chains. Once you have a suspect root, prefer `get_neighborhood`. | `namespace` (optional), `view` (optional: `traffic` or `resources`), `format` (optional: `graph` or `summary`) |
+| `get_neighborhood` | BFS-expanded topology neighborhood around one known root — cheaper and clearer than `get_topology` for cross-resource failures (routing, selector/endpoint, refs, owner chains). RBAC-filtered. | `kind` (required), `namespace` (optional), `name` (required), `profile` (optional: `auto` default / `all`), `hops` (optional, default 1, max 2) |
+| `get_events` | Recent Kubernetes Warning events, deduplicated and sorted by recency. Filter by resource kind/name to scope. | `namespace` (optional), `limit` (optional, default 20, max 100), `kind` (optional), `name` (optional) |
 | `get_changes` | Recent resource changes (creates, updates, deletes) from the cluster timeline. Use to investigate what changed before an incident. | `namespace` (optional), `kind` (optional), `name` (optional), `since` (optional, e.g. `1h`, `30m`; default `1h`), `limit` (optional, default 20, max 50) |
+| `get_pod_logs` | Filtered pod logs prioritizing errors/warnings, with secret redaction. Set `grep` for server-side filtering. | `namespace` (required), `name` (required), `container` (optional), `tail_lines` (optional, default 200), `grep` (optional) |
+| `get_workload_logs` | Aggregated, AI-filtered logs from all pods of a workload (Deployment, StatefulSet, DaemonSet) | `kind` (required), `namespace` (required), `name` (required), `container` (optional), `tail_lines` (optional, default 100 per pod), `grep` (optional) |
+| `get_cluster_audit` | Static config posture — best-practice findings (Security / Reliability / Efficiency) with remediation. INDEPENDENT of operational health; for "what's broken right now?" use `issues`. | `namespace` (optional), `category` (optional), `severity` (optional) |
+| `list_packages` | Installed packages (Helm releases, label-managed workloads, CRDs, Argo Applications, Flux HelmReleases + Kustomizations) with source provenance, versions, and health, in one call. | `namespace` (optional), `source` (optional), `chart` (optional substring) |
 | `list_helm_releases` | List all Helm releases with status and health | `namespace` (optional) |
 | `get_helm_release` | Detailed Helm release info with optional values, history, and manifest diff | `namespace` (required), `name` (required), `include` (optional: `values,history,diff`), `diff_revision_1` (required when `include=diff`) / `diff_revision_2` (optional) |
-| `get_workload_logs` | Aggregated, AI-filtered logs from all pods of a workload (Deployment, StatefulSet, DaemonSet) | `kind` (required), `namespace` (required), `name` (required), `container` (optional), `tail_lines` (optional, default 100 per pod) |
+| `list_namespaces` | List all namespaces with status | (none) |
 | `get_subject_permissions` | Effective RBAC permissions of a ServiceAccount / User / Group: bindings (each with `inheritedFromGroup` set when applicable), deduplicated flat rule list, and (for SAs) the Pods running as it. Use to answer "is this SA over-privileged?" or "what's the blast radius if this Pod is compromised?" | `kind` (required: `ServiceAccount`, `User`, or `Group`), `namespace` (required for ServiceAccount; omit for User/Group), `name` (required) |
 
 ### Write Tools
@@ -198,7 +205,7 @@ Add to `~/.gemini/settings.json`:
 
 ## Security
 
-- **Safe by design** — read tools are strictly read-only; write tools perform non-destructive operations (restart, scale, sync) and are annotated with MCP tool hints so AI clients can distinguish them
+- **Safe by design** — read tools are strictly read-only and annotated with `readOnlyHint`; write tools (restart, scale, rollback, sync, apply, cordon/drain) are RBAC-enforced and annotated with `destructiveHint` so AI clients can prompt for confirmation. Some are genuinely destructive — `apply_resource` uses server-side apply with `Force=true` (can take field ownership from Helm/Flux), `manage_node drain` evicts pods, and `rollback`/`terminate` overwrite or abort desired state
 - **RBAC-aware** — every call enforces RBAC at the same boundary as the REST API:
   - **Local binary**: the cache uses your kubeconfig identity, so MCP can only see what `kubectl` can see for that user
   - **In-cluster (auth enabled)**: read tools intersect namespaced reads with the calling user's RBAC-allowed namespaces; cluster-scoped reads (Nodes, PVs, ClusterRoles, cluster-scoped CRDs) are gated per-kind via SubjectAccessReview, so cluster-wide pod visibility doesn't implicitly grant Node read; write tools, exec, and logs are fully impersonated so the apiserver enforces the user's RBAC end-to-end

@@ -2,46 +2,33 @@ package issues
 
 import (
 	"testing"
-	"time"
 
-	corev1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
 
 	"github.com/skyhook-io/radar/internal/filter"
 	"github.com/skyhook-io/radar/internal/k8s"
 )
 
 // Filter integration tests — exercise ComposeWithStats with a compiled
-// CEL filter, covering match/drop, eval-error stats, and the source-
+// CEL filter, covering match/drop, eval-error stats, and the
 // post-filter ordering invariant that limit applies last.
 
-func TestCompose_WithCELFilter_FiltersByCount(t *testing.T) {
-	// Mix of low-count problems and high-count events; `count > 5`
-	// should keep only the events.
-	now := time.Now()
+func TestCompose_WithCELFilter_MatchesAndDrops(t *testing.T) {
+	// Two problem rows; a reason predicate should keep only the match.
 	p := &fakeProvider{
 		problems: []k8s.Problem{
-			{Kind: "Pod", Name: "p1", Severity: "critical", Reason: "x"},
-		},
-		events: []*corev1.Event{
-			{
-				ObjectMeta:     metav1.ObjectMeta{Namespace: "ns", Name: "evt-1"},
-				InvolvedObject: corev1.ObjectReference{Kind: "Pod", Name: "evt-pod"},
-				Reason:         "BackOff",
-				Type:           corev1.EventTypeWarning,
-				FirstTimestamp: metav1.Time{Time: now.Add(-2 * time.Minute)},
-				LastTimestamp:  metav1.Time{Time: now.Add(-1 * time.Minute)},
-				Count:          10,
-			},
+			{Kind: "Pod", Namespace: "ns", Name: "crash", Severity: "critical", Reason: "CrashLoopBackOff"},
+			{Kind: "Pod", Namespace: "ns", Name: "oom", Severity: "critical", Reason: "OOMKilled"},
 		},
 	}
-	f, err := filter.CompileIssueFilter(`count > 5`)
+	f, err := filter.CompileIssueFilter(`reason == "OOMKilled"`)
 	if err != nil {
 		t.Fatal(err)
 	}
-	out, stats := ComposeWithStats(p, Filters{Filter: f, IncludeEvents: true})
-	if len(out) != 1 || out[0].Name != "evt-pod" {
-		t.Fatalf("expected single event-source hit, got %+v", out)
+	out, stats := ComposeWithStats(p, Filters{Filter: f})
+	if len(out) != 1 || out[0].Name != "oom" {
+		t.Fatalf("expected single OOMKilled hit, got %+v", out)
 	}
 	if stats.FilterErrors != 0 {
 		t.Errorf("clean filter, expected no eval errors, got %d", stats.FilterErrors)
@@ -72,6 +59,35 @@ func TestCompose_FilterAppliedBeforeLimit(t *testing.T) {
 	}
 }
 
+func TestCompose_WithCELFilter_SourceBinding(t *testing.T) {
+	// The `source=` query param was removed; the CEL `source` binding is now
+	// the ONLY way to slice issues by detector (documented migration path in
+	// the HTTP handler + MCP tool schema). Guard that the binding exists and
+	// slices correctly across two distinct sources.
+	gvr := schema.GroupVersionResource{Group: "argoproj.io", Version: "v1alpha1", Resource: "applications"}
+	app := &unstructured.Unstructured{Object: map[string]any{
+		"apiVersion": "argoproj.io/v1alpha1",
+		"kind":       "Application",
+		"metadata":   map[string]any{"name": "my-app", "namespace": "argocd"},
+		"status": map[string]any{"conditions": []any{
+			map[string]any{"type": "Synced", "status": "False", "reason": "OutOfSync", "message": "drift"},
+		}},
+	}}
+	p := &fakeProvider{
+		problems: []k8s.Problem{{Kind: "Deployment", Namespace: "argocd", Name: "api", Severity: "critical", Reason: "down"}},
+		dynamic:  map[schema.GroupVersionResource][]*unstructured.Unstructured{gvr: {app}},
+		kinds:    map[schema.GroupVersionResource]string{gvr: "Application"},
+	}
+	f, err := filter.CompileIssueFilter(`source == "condition"`)
+	if err != nil {
+		t.Fatal(err)
+	}
+	out, _ := ComposeWithStats(p, Filters{Filter: f})
+	if len(out) != 1 || out[0].Source != SourceCondition {
+		t.Fatalf("source==\"condition\" should keep only the condition row, got %+v", out)
+	}
+}
+
 func TestCompose_FilterEvalError_StatsPopulated(t *testing.T) {
 	// Reference an unbound-but-syntactically-valid path that won't
 	// resolve on any actual issue row — the dyn-typed env declares