diff --git a/cmd/thv-operator/controllers/mcpregistry_controller.go b/cmd/thv-operator/controllers/mcpregistry_controller.go index e6d05f4d0e..f15113eb85 100644 --- a/cmd/thv-operator/controllers/mcpregistry_controller.go +++ b/cmd/thv-operator/controllers/mcpregistry_controller.go @@ -47,8 +47,8 @@ type MCPRegistryReconciler struct { } // NewMCPRegistryReconciler creates a new MCPRegistryReconciler with required dependencies -func NewMCPRegistryReconciler(k8sClient client.Client, scheme *runtime.Scheme) *MCPRegistryReconciler { - registryAPIManager := registryapi.NewManager(k8sClient, scheme) +func NewMCPRegistryReconciler(k8sClient client.Client, scheme *runtime.Scheme, disableWorkloadRBAC bool) *MCPRegistryReconciler { + registryAPIManager := registryapi.NewManager(k8sClient, scheme, disableWorkloadRBAC) return &MCPRegistryReconciler{ Client: k8sClient, Scheme: scheme, @@ -67,10 +67,6 @@ func NewMCPRegistryReconciler(k8sClient client.Client, scheme *runtime.Scheme) * // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete // -// For creating registry-api RBAC resources -// +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings,verbs=get;list;watch;create;update;patch;delete -// // For granting registry-api permissions (operator must have these to grant them via Role) // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpservers;mcpremoteproxies;virtualmcpservers,verbs=get;list;watch // +kubebuilder:rbac:groups=gateway.networking.k8s.io,resources=httproutes;gateways,verbs=get;list;watch diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_controller.go b/cmd/thv-operator/controllers/mcpremoteproxy_controller.go index cc4e1fb9bf..2ac6fa358d 100644 --- a/cmd/thv-operator/controllers/mcpremoteproxy_controller.go +++ b/cmd/thv-operator/controllers/mcpremoteproxy_controller.go @@ -37,9 +37,10 @@ import ( // MCPRemoteProxyReconciler reconciles a MCPRemoteProxy object type MCPRemoteProxyReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder - PlatformDetector *ctrlutil.SharedPlatformDetector + Scheme *runtime.Scheme + Recorder record.EventRecorder + PlatformDetector *ctrlutil.SharedPlatformDetector + DisableWorkloadRBAC bool } // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpremoteproxies,verbs=get;list;watch;create;update;patch;delete @@ -48,12 +49,9 @@ type MCPRemoteProxyReconciler struct { // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpexternalauthconfigs,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=configmaps,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups="",resources=services,verbs=create;delete;get;list;patch;update;watch -// +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles,verbs=create;delete;get;list;patch;update;watch -// +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=rolebindings,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=create;delete;get;list;patch;update;watch -// +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=create;delete;get;list;patch;update;watch // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -584,6 +582,10 @@ func (r *MCPRemoteProxyReconciler) validateGroupRef(ctx context.Context, proxy * // Uses the RBAC client (pkg/kubernetes/rbac) which creates or updates RBAC resources // automatically during operator upgrades. func (r *MCPRemoteProxyReconciler) ensureRBACResources(ctx context.Context, proxy *mcpv1alpha1.MCPRemoteProxy) error { + if r.DisableWorkloadRBAC { + return nil + } + // If a service account is specified, we don't need to create one if proxy.Spec.ServiceAccount != nil { return nil diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_controller_test.go b/cmd/thv-operator/controllers/mcpremoteproxy_controller_test.go index e58c331896..e98de489b3 100644 --- a/cmd/thv-operator/controllers/mcpremoteproxy_controller_test.go +++ b/cmd/thv-operator/controllers/mcpremoteproxy_controller_test.go @@ -1099,6 +1099,50 @@ func TestMCPRemoteProxyEnsureRBACResources_CustomServiceAccount(t *testing.T) { assert.Error(t, err, "RoleBinding should not be created when custom ServiceAccount is provided") } +// TestEnsureRBACResources_DisableWorkloadRBAC_MCPRemoteProxy tests that RBAC creation is skipped +// when DisableWorkloadRBAC is true. +func TestEnsureRBACResources_DisableWorkloadRBAC_MCPRemoteProxy(t *testing.T) { + t.Parallel() + + proxy := &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-proxy-disable-rbac", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + RemoteURL: "https://mcp.example.com", + Port: 8080, + }, + } + + testScheme := createRunConfigTestScheme() + _ = rbacv1.AddToScheme(testScheme) + + fakeClient := fake.NewClientBuilder(). + WithScheme(testScheme). + WithRuntimeObjects(proxy). + Build() + + reconciler := &MCPRemoteProxyReconciler{ + Client: fakeClient, + Scheme: testScheme, + DisableWorkloadRBAC: true, + } + + err := reconciler.ensureRBACResources(t.Context(), proxy) + require.NoError(t, err) + + // Verify NO RBAC resources were created + generatedSAName := proxyRunnerServiceAccountNameForRemoteProxy(proxy.Name) + + sa := &corev1.ServiceAccount{} + err = fakeClient.Get(t.Context(), types.NamespacedName{ + Name: generatedSAName, + Namespace: proxy.Namespace, + }, sa) + assert.Error(t, err, "ServiceAccount should not be created when workload RBAC is disabled") +} + // TestUpdateMCPRemoteProxyStatus tests status update logic func TestUpdateMCPRemoteProxyStatus(t *testing.T) { t.Parallel() diff --git a/cmd/thv-operator/controllers/mcpserver_controller.go b/cmd/thv-operator/controllers/mcpserver_controller.go index 86b741eac4..50bd4f28ee 100644 --- a/cmd/thv-operator/controllers/mcpserver_controller.go +++ b/cmd/thv-operator/controllers/mcpserver_controller.go @@ -46,10 +46,11 @@ import ( // MCPServerReconciler reconciles a MCPServer object type MCPServerReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder - PlatformDetector *ctrlutil.SharedPlatformDetector - ImageValidation validation.ImageValidation + Scheme *runtime.Scheme + Recorder record.EventRecorder + PlatformDetector *ctrlutil.SharedPlatformDetector + ImageValidation validation.ImageValidation + DisableWorkloadRBAC bool } // defaultRBACRules are the default RBAC rules that the @@ -141,13 +142,10 @@ func (r *MCPServerReconciler) detectPlatform(ctx context.Context) (kubernetes.Pl // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcptoolconfigs,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=configmaps,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups="",resources=services,verbs=create;delete;get;list;patch;update;watch -// +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles,verbs=create;delete;get;list;patch;update;watch -// +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=rolebindings,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch // +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=create;delete;get;list;patch;update;watch -// +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=pods/attach,verbs=create;get // +kubebuilder:rbac:groups="",resources=pods/log,verbs=get @@ -884,7 +882,12 @@ func (r *MCPServerReconciler) handleToolConfig(ctx context.Context, m *mcpv1alph return nil } + func (r *MCPServerReconciler) ensureRBACResources(ctx context.Context, mcpServer *mcpv1alpha1.MCPServer) error { + if r.DisableWorkloadRBAC { + return nil + } + rbacClient := rbac.NewClient(r.Client, r.Scheme) proxyRunnerNameForRBAC := ctrlutil.ProxyRunnerServiceAccountName(mcpServer.Name) diff --git a/cmd/thv-operator/controllers/mcpserver_rbac_test.go b/cmd/thv-operator/controllers/mcpserver_rbac_test.go index a19f830758..6d855e107f 100644 --- a/cmd/thv-operator/controllers/mcpserver_rbac_test.go +++ b/cmd/thv-operator/controllers/mcpserver_rbac_test.go @@ -424,6 +424,23 @@ func TestEnsureRBACResources_ImagePullSecrets(t *testing.T) { assert.Equal(t, expectedSecrets, mcpServerSA.ImagePullSecrets) } +func TestEnsureRBACResources_DisableWorkloadRBAC(t *testing.T) { + t.Parallel() + tc := setupTest("test-server-disable-rbac", "default") + tc.reconciler.DisableWorkloadRBAC = true + + err := tc.ensureRBACResources() + require.NoError(t, err) + + // Verify NO RBAC resources were created + sa := &corev1.ServiceAccount{} + err = tc.client.Get(t.Context(), types.NamespacedName{ + Name: tc.proxyRunnerNameForRBAC, + Namespace: tc.mcpServer.Namespace, + }, sa) + assert.Error(t, err, "ServiceAccount should not be created when workload RBAC is disabled") +} + func createTestMCPServer(name, namespace string) *mcpv1alpha1.MCPServer { return &mcpv1alpha1.MCPServer{ ObjectMeta: metav1.ObjectMeta{ diff --git a/cmd/thv-operator/controllers/virtualmcpserver_controller.go b/cmd/thv-operator/controllers/virtualmcpserver_controller.go index e3a4b25e5a..c8a8caa0df 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_controller.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_controller.go @@ -83,9 +83,10 @@ type AuthConfigError struct { // may not have owner references (StatefulSet, headless Service, RunConfig ConfigMap). type VirtualMCPServerReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder - PlatformDetector *ctrlutil.SharedPlatformDetector + Scheme *runtime.Scheme + Recorder record.EventRecorder + PlatformDetector *ctrlutil.SharedPlatformDetector + DisableWorkloadRBAC bool } // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=virtualmcpservers,verbs=get;list;watch;create;update;patch;delete @@ -97,12 +98,9 @@ type VirtualMCPServerReconciler struct { // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=virtualmcpcompositetooldefinitions,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=configmaps,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups="",resources=services,verbs=create;delete;get;list;patch;update;watch -// +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles,verbs=create;delete;get;list;patch;update;watch -// +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=rolebindings,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=create;delete;get;list;patch;update;watch -// +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=embeddingservers,verbs=get;list;watch // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=embeddingservers/status,verbs=get @@ -630,6 +628,10 @@ func (r *VirtualMCPServerReconciler) ensureRBACResources( ctx context.Context, vmcp *mcpv1alpha1.VirtualMCPServer, ) error { + if r.DisableWorkloadRBAC { + return nil + } + // If a service account is specified, we don't need to create one if vmcp.Spec.ServiceAccount != nil { return nil diff --git a/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go b/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go index b43ee5a9d2..aa75d6cbd1 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go @@ -630,6 +630,52 @@ func TestVirtualMCPServerEnsureRBACResources_CustomServiceAccount(t *testing.T) assert.Error(t, err, "RoleBinding should not be created when custom ServiceAccount is provided") } +// TestVirtualMCPServerEnsureRBACResources_DisableWorkloadRBAC tests that RBAC creation is skipped +// when DisableWorkloadRBAC is true. +func TestVirtualMCPServerEnsureRBACResources_DisableWorkloadRBAC(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp-disable-rbac", + Namespace: "default", + UID: "test-uid", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + Config: vmcpconfig.Config{Group: testGroupName}, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + _ = rbacv1.AddToScheme(scheme) + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(vmcp). + Build() + + r := &VirtualMCPServerReconciler{ + Client: fakeClient, + Scheme: scheme, + DisableWorkloadRBAC: true, + } + + err := r.ensureRBACResources(t.Context(), vmcp) + require.NoError(t, err) + + // Verify NO RBAC resources were created + generatedSAName := vmcpServiceAccountName(vmcp.Name) + + sa := &corev1.ServiceAccount{} + err = fakeClient.Get(t.Context(), types.NamespacedName{ + Name: generatedSAName, + Namespace: vmcp.Namespace, + }, sa) + assert.Error(t, err, "ServiceAccount should not be created when workload RBAC is disabled") +} + // TestVirtualMCPServerEnsureDeployment tests Deployment creation func TestVirtualMCPServerEnsureDeployment(t *testing.T) { t.Parallel() diff --git a/cmd/thv-operator/main.go b/cmd/thv-operator/main.go index 099b897bbc..1e58c0716f 100644 --- a/cmd/thv-operator/main.go +++ b/cmd/thv-operator/main.go @@ -46,6 +46,10 @@ const ( featureServer = "ENABLE_SERVER" featureRegistry = "ENABLE_REGISTRY" featureVMCP = "ENABLE_VMCP" + // disableWorkloadRBAC disables per-workload RBAC management (ServiceAccount, Role, RoleBinding). + // When enabled, the operator will not create RBAC resources for workloads, + // allowing them to be managed externally (e.g., via per-workload Helm charts). + disableWorkloadRBAC = "DISABLE_WORKLOAD_RBAC" ) // controllerDependencies maps each controller group to its required dependencies @@ -131,6 +135,11 @@ func setupControllersAndWebhooks(mgr ctrl.Manager) error { enableServer := isFeatureEnabled(featureServer, true) enableRegistry := isFeatureEnabled(featureRegistry, true) enableVMCP := isFeatureEnabled(featureVMCP, true) + workloadRBACDisabled := isFeatureEnabled(disableWorkloadRBAC, false) + + if workloadRBACDisabled { + setupLog.Info("DISABLE_WORKLOAD_RBAC is enabled, operator will not create per-workload RBAC resources") + } // Track enabled features for dependency checking enabledFeatures := map[string]bool{ @@ -159,7 +168,7 @@ func setupControllersAndWebhooks(mgr ctrl.Manager) error { // Set up server-related controllers if enabledFeatures[featureServer] { - if err := setupServerControllers(mgr, enableRegistry); err != nil { + if err := setupServerControllers(mgr, enableRegistry, workloadRBACDisabled); err != nil { return err } } else { @@ -168,7 +177,7 @@ func setupControllersAndWebhooks(mgr ctrl.Manager) error { // Set up registry controller if enabledFeatures[featureRegistry] { - if err := setupRegistryController(mgr); err != nil { + if err := setupRegistryController(mgr, workloadRBACDisabled); err != nil { return err } } else { @@ -177,7 +186,7 @@ func setupControllersAndWebhooks(mgr ctrl.Manager) error { // Set up Virtual MCP controllers and webhooks if enabledFeatures[featureVMCP] { - if err := setupAggregationControllers(mgr); err != nil { + if err := setupAggregationControllers(mgr, workloadRBACDisabled); err != nil { return err } } else { @@ -189,7 +198,7 @@ func setupControllersAndWebhooks(mgr ctrl.Manager) error { } // setupServerControllers sets up server-related controllers (MCPServer, MCPExternalAuthConfig, MCPRemoteProxy, ToolConfig) -func setupServerControllers(mgr ctrl.Manager, enableRegistry bool) error { +func setupServerControllers(mgr ctrl.Manager, enableRegistry, disableWorkloadRBAC bool) error { // Set up field indexing for MCPServer.Spec.GroupRef if err := mgr.GetFieldIndexer().IndexField( context.Background(), @@ -232,11 +241,12 @@ func setupServerControllers(mgr ctrl.Manager, enableRegistry bool) error { // Set up MCPServer controller rec := &controllers.MCPServerReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("mcpserver-controller"), - PlatformDetector: ctrlutil.NewSharedPlatformDetector(), - ImageValidation: imageValidation, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("mcpserver-controller"), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImageValidation: imageValidation, + DisableWorkloadRBAC: disableWorkloadRBAC, } if err := rec.SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller MCPServer: %w", err) @@ -260,10 +270,11 @@ func setupServerControllers(mgr ctrl.Manager, enableRegistry bool) error { // Set up MCPRemoteProxy controller if err := (&controllers.MCPRemoteProxyReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("mcpremoteproxy-controller"), - PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("mcpremoteproxy-controller"), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + DisableWorkloadRBAC: disableWorkloadRBAC, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller MCPRemoteProxy: %w", err) } @@ -283,8 +294,11 @@ func setupServerControllers(mgr ctrl.Manager, enableRegistry bool) error { } // setupRegistryController sets up the MCPRegistry controller -func setupRegistryController(mgr ctrl.Manager) error { - if err := (controllers.NewMCPRegistryReconciler(mgr.GetClient(), mgr.GetScheme())).SetupWithManager(mgr); err != nil { +func setupRegistryController(mgr ctrl.Manager, disableWorkloadRBAC bool) error { + reconciler := controllers.NewMCPRegistryReconciler( + mgr.GetClient(), mgr.GetScheme(), disableWorkloadRBAC, + ) + if err := reconciler.SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller MCPRegistry: %w", err) } return nil @@ -294,7 +308,7 @@ func setupRegistryController(mgr ctrl.Manager) error { // (MCPGroup, VirtualMCPServer, and their webhooks) // Note: This function assumes server controllers are enabled (enforced by dependency check) // The field index for MCPServer.Spec.GroupRef is created in setupServerControllers -func setupAggregationControllers(mgr ctrl.Manager) error { +func setupAggregationControllers(mgr ctrl.Manager, disableWorkloadRBAC bool) error { // Set up MCPGroup controller if err := (&controllers.MCPGroupReconciler{ Client: mgr.GetClient(), @@ -304,10 +318,11 @@ func setupAggregationControllers(mgr ctrl.Manager) error { // Set up VirtualMCPServer controller if err := (&controllers.VirtualMCPServerReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("virtualmcpserver-controller"), - PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("virtualmcpserver-controller"), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + DisableWorkloadRBAC: disableWorkloadRBAC, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller VirtualMCPServer: %w", err) } diff --git a/cmd/thv-operator/pkg/registryapi/manager.go b/cmd/thv-operator/pkg/registryapi/manager.go index 35d1b753dd..9304444fcb 100644 --- a/cmd/thv-operator/pkg/registryapi/manager.go +++ b/cmd/thv-operator/pkg/registryapi/manager.go @@ -21,20 +21,23 @@ import ( // manager implements the Manager interface type manager struct { - client client.Client - scheme *runtime.Scheme - kubeHelper *kubernetes.Client + client client.Client + scheme *runtime.Scheme + kubeHelper *kubernetes.Client + disableWorkloadRBAC bool } // NewManager creates a new registry API manager func NewManager( k8sClient client.Client, scheme *runtime.Scheme, + disableWorkloadRBAC bool, ) Manager { return &manager{ - client: k8sClient, - scheme: scheme, - kubeHelper: kubernetes.NewClient(k8sClient, scheme), + client: k8sClient, + scheme: scheme, + kubeHelper: kubernetes.NewClient(k8sClient, scheme), + disableWorkloadRBAC: disableWorkloadRBAC, } } diff --git a/cmd/thv-operator/pkg/registryapi/manager_test.go b/cmd/thv-operator/pkg/registryapi/manager_test.go index 5e699ea97b..e0c8ec9d83 100644 --- a/cmd/thv-operator/pkg/registryapi/manager_test.go +++ b/cmd/thv-operator/pkg/registryapi/manager_test.go @@ -46,7 +46,7 @@ func TestNewManager(t *testing.T) { scheme := runtime.NewScheme() // Create manager - manager := NewManager(nil, scheme) + manager := NewManager(nil, scheme, false) // Verify manager is created assert.NotNil(t, manager) @@ -102,7 +102,7 @@ func TestReconcileAPIService(t *testing.T) { } // Create manager - manager := NewManager(fakeClient, scheme) + manager := NewManager(fakeClient, scheme, false) // Execute result := manager.ReconcileAPIService(context.Background(), mcpRegistry) @@ -187,7 +187,7 @@ func TestReconcileAPIService(t *testing.T) { } // Create manager - manager := NewManager(fakeClient, scheme) + manager := NewManager(fakeClient, scheme, false) // Execute result := manager.ReconcileAPIService(context.Background(), mcpRegistry) diff --git a/cmd/thv-operator/pkg/registryapi/rbac.go b/cmd/thv-operator/pkg/registryapi/rbac.go index 31b0dd6b0a..6827d9b752 100644 --- a/cmd/thv-operator/pkg/registryapi/rbac.go +++ b/cmd/thv-operator/pkg/registryapi/rbac.go @@ -70,6 +70,10 @@ func (m *manager) ensureRBACResources( ctx context.Context, mcpRegistry *mcpv1alpha1.MCPRegistry, ) error { + if m.disableWorkloadRBAC { + return nil + } + ctxLogger := log.FromContext(ctx).WithValues("mcpregistry", mcpRegistry.Name) ctxLogger.Info("Ensuring RBAC resources for registry API") diff --git a/cmd/thv-operator/pkg/registryapi/rbac_test.go b/cmd/thv-operator/pkg/registryapi/rbac_test.go index 7f6154709f..0358ef726a 100644 --- a/cmd/thv-operator/pkg/registryapi/rbac_test.go +++ b/cmd/thv-operator/pkg/registryapi/rbac_test.go @@ -201,6 +201,33 @@ func TestEnsureRBACResources(t *testing.T) { } } +func TestReconcileAPIService_DisableWorkloadRBAC(t *testing.T) { + t.Parallel() + + mcpRegistry := createTestMCPRegistry() + c := fake.NewClientBuilder().WithScheme(createTestScheme()).Build() + m := &manager{ + client: c, + scheme: createTestScheme(), + disableWorkloadRBAC: true, + } + + // ensureRBACResources is called within ReconcileAPIService, but we test + // the guard directly since ReconcileAPIService has other dependencies. + // Verify that ensureRBACResources returns nil immediately. + err := m.ensureRBACResources(t.Context(), mcpRegistry) + require.NoError(t, err) + + // Verify NO RBAC resources were created + resourceName := GetServiceAccountName(mcpRegistry) + sa := &corev1.ServiceAccount{} + err = c.Get(t.Context(), types.NamespacedName{ + Name: resourceName, + Namespace: mcpRegistry.Namespace, + }, sa) + assert.Error(t, err, "ServiceAccount should not be created when workload RBAC is disabled") +} + func TestRegistryAPIRBACRules(t *testing.T) { t.Parallel() diff --git a/cmd/thv-operator/test-integration/mcp-registry/suite_test.go b/cmd/thv-operator/test-integration/mcp-registry/suite_test.go index f158191dbf..44d7f6d32c 100644 --- a/cmd/thv-operator/test-integration/mcp-registry/suite_test.go +++ b/cmd/thv-operator/test-integration/mcp-registry/suite_test.go @@ -114,7 +114,7 @@ var _ = BeforeSuite(func() { // Set up MCPRegistry controller By("setting up MCPRegistry controller") - err = controllers.NewMCPRegistryReconciler(testMgr.GetClient(), testMgr.GetScheme()).SetupWithManager(testMgr) + err = controllers.NewMCPRegistryReconciler(testMgr.GetClient(), testMgr.GetScheme(), false).SetupWithManager(testMgr) Expect(err).NotTo(HaveOccurred()) // Start the manager in the background diff --git a/deploy/charts/operator/README.md b/deploy/charts/operator/README.md index 632d78da56..dd30fc06ea 100644 --- a/deploy/charts/operator/README.md +++ b/deploy/charts/operator/README.md @@ -52,7 +52,7 @@ The command removes all the Kubernetes components associated with the chart and |-----|------|---------|-------------| | fullnameOverride | string | `"toolhive-operator"` | Provide a fully-qualified name override for resources | | nameOverride | string | `""` | Override the name of the chart | -| operator | object | `{"affinity":{},"autoscaling":{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80},"containerSecurityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}},"env":{},"features":{"experimental":false,"registry":true,"server":true,"virtualMCP":true},"gc":{"gogc":75,"gomeglimit":"150MiB"},"image":"ghcr.io/stacklok/toolhive/operator:v0.11.1","imagePullPolicy":"IfNotPresent","imagePullSecrets":[],"leaderElectionRole":{"binding":{"name":"toolhive-operator-leader-election-rolebinding"},"name":"toolhive-operator-leader-election-role","rules":[{"apiGroups":[""],"resources":["configmaps"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["coordination.k8s.io"],"resources":["leases"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":[""],"resources":["events"],"verbs":["create","patch"]}]},"livenessProbe":{"httpGet":{"path":"/healthz","port":"health"},"initialDelaySeconds":15,"periodSeconds":20},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"podSecurityContext":{"runAsNonRoot":true},"ports":[{"containerPort":8080,"name":"metrics","protocol":"TCP"},{"containerPort":8081,"name":"health","protocol":"TCP"}],"proxyHost":"0.0.0.0","rbac":{"allowedNamespaces":[],"scope":"cluster"},"readinessProbe":{"httpGet":{"path":"/readyz","port":"health"},"initialDelaySeconds":5,"periodSeconds":10},"replicaCount":1,"resources":{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}},"serviceAccount":{"annotations":{},"automountServiceAccountToken":true,"create":true,"labels":{},"name":"toolhive-operator"},"tolerations":[],"toolhiveRunnerImage":"ghcr.io/stacklok/toolhive/proxyrunner:v0.11.1","vmcpImage":"ghcr.io/stacklok/toolhive/vmcp:v0.11.1","volumeMounts":[],"volumes":[]}` | All values for the operator deployment and associated resources | +| operator | object | `{"affinity":{},"autoscaling":{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80},"containerSecurityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}},"env":{},"features":{"experimental":false,"registry":true,"server":true,"virtualMCP":true},"gc":{"gogc":75,"gomeglimit":"150MiB"},"image":"ghcr.io/stacklok/toolhive/operator:v0.11.1","imagePullPolicy":"IfNotPresent","imagePullSecrets":[],"leaderElectionRole":{"binding":{"name":"toolhive-operator-leader-election-rolebinding"},"name":"toolhive-operator-leader-election-role","rules":[{"apiGroups":[""],"resources":["configmaps"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["coordination.k8s.io"],"resources":["leases"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":[""],"resources":["events"],"verbs":["create","patch"]}]},"livenessProbe":{"httpGet":{"path":"/healthz","port":"health"},"initialDelaySeconds":15,"periodSeconds":20},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"podSecurityContext":{"runAsNonRoot":true},"ports":[{"containerPort":8080,"name":"metrics","protocol":"TCP"},{"containerPort":8081,"name":"health","protocol":"TCP"}],"proxyHost":"0.0.0.0","rbac":{"allowedNamespaces":[],"disableWorkloadRBAC":false,"scope":"cluster"},"readinessProbe":{"httpGet":{"path":"/readyz","port":"health"},"initialDelaySeconds":5,"periodSeconds":10},"replicaCount":1,"resources":{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}},"serviceAccount":{"annotations":{},"automountServiceAccountToken":true,"create":true,"labels":{},"name":"toolhive-operator"},"tolerations":[],"toolhiveRunnerImage":"ghcr.io/stacklok/toolhive/proxyrunner:v0.11.1","vmcpImage":"ghcr.io/stacklok/toolhive/vmcp:v0.11.1","volumeMounts":[],"volumes":[]}` | All values for the operator deployment and associated resources | | operator.affinity | object | `{}` | Affinity settings for the operator pod | | operator.autoscaling | object | `{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80}` | Configuration for horizontal pod autoscaling | | operator.autoscaling.enabled | bool | `false` | Enable autoscaling for the operator | @@ -82,8 +82,9 @@ The command removes all the Kubernetes components associated with the chart and | operator.podSecurityContext | object | `{"runAsNonRoot":true}` | Pod security context settings | | operator.ports | list | `[{"containerPort":8080,"name":"metrics","protocol":"TCP"},{"containerPort":8081,"name":"health","protocol":"TCP"}]` | List of ports to expose from the operator container | | operator.proxyHost | string | `"0.0.0.0"` | Host for the proxy deployed by the operator | -| operator.rbac | object | `{"allowedNamespaces":[],"scope":"cluster"}` | RBAC configuration for the operator | +| operator.rbac | object | `{"allowedNamespaces":[],"disableWorkloadRBAC":false,"scope":"cluster"}` | RBAC configuration for the operator | | operator.rbac.allowedNamespaces | list | `[]` | List of namespaces that the operator is allowed to have permissions to manage. Only used if scope is set to "namespace". | +| operator.rbac.disableWorkloadRBAC | bool | `false` | Disable per-workload RBAC management (ServiceAccount, Role, RoleBinding). When true, the operator will not create RBAC resources for workloads. Use this when RBAC is managed externally (e.g., via per-workload Helm charts). | | operator.rbac.scope | string | `"cluster"` | Scope of the RBAC configuration. - cluster: The operator will have cluster-wide permissions via ClusterRole and ClusterRoleBinding. - namespace: The operator will have permissions to manage resources in the namespaces specified in `allowedNamespaces`. The operator will have a ClusterRole and RoleBinding for each namespace in `allowedNamespaces`. | | operator.readinessProbe | object | `{"httpGet":{"path":"/readyz","port":"health"},"initialDelaySeconds":5,"periodSeconds":10}` | Readiness probe configuration for the operator | | operator.replicaCount | int | `1` | Number of replicas for the operator deployment | diff --git a/deploy/charts/operator/ci/externalRBAC-values.yaml b/deploy/charts/operator/ci/externalRBAC-values.yaml new file mode 100644 index 0000000000..0882f3565e --- /dev/null +++ b/deploy/charts/operator/ci/externalRBAC-values.yaml @@ -0,0 +1,3 @@ +operator: + rbac: + disableWorkloadRBAC: true diff --git a/deploy/charts/operator/templates/clusterrole/role.yaml b/deploy/charts/operator/templates/clusterrole/role.yaml index bde6b03ee3..6b3c0cf8bd 100644 --- a/deploy/charts/operator/templates/clusterrole/role.yaml +++ b/deploy/charts/operator/templates/clusterrole/role.yaml @@ -10,7 +10,6 @@ rules: - configmaps - persistentvolumeclaims - secrets - - serviceaccounts - services verbs: - create @@ -82,19 +81,6 @@ rules: - get - list - watch -- apiGroups: - - rbac.authorization.k8s.io - resources: - - rolebindings - - roles - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - apiGroups: - toolhive.stacklok.dev resources: diff --git a/deploy/charts/operator/templates/clusterrole/workload-rbac-role.yaml b/deploy/charts/operator/templates/clusterrole/workload-rbac-role.yaml new file mode 100644 index 0000000000..db35b308cb --- /dev/null +++ b/deploy/charts/operator/templates/clusterrole/workload-rbac-role.yaml @@ -0,0 +1,35 @@ +{{- if not .Values.operator.rbac.disableWorkloadRBAC }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: toolhive-operator-workload-rbac-role + labels: + {{- include "toolhive.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - rbac.authorization.k8s.io + resources: + - roles + - rolebindings + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +{{- end }} diff --git a/deploy/charts/operator/templates/clusterrole/workload-rbac-rolebinding.yaml b/deploy/charts/operator/templates/clusterrole/workload-rbac-rolebinding.yaml new file mode 100644 index 0000000000..c771516dd8 --- /dev/null +++ b/deploy/charts/operator/templates/clusterrole/workload-rbac-rolebinding.yaml @@ -0,0 +1,42 @@ +{{- if not .Values.operator.rbac.disableWorkloadRBAC }} + +{{- if eq .Values.operator.rbac.scope "cluster" }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: toolhive-operator-workload-rbac-rolebinding + labels: + {{- include "toolhive.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: toolhive-operator-workload-rbac-role +subjects: +- kind: ServiceAccount + name: toolhive-operator + namespace: {{ .Release.Namespace }} +{{- end }} + +{{- if eq .Values.operator.rbac.scope "namespace" }} +{{- range .Values.operator.rbac.allowedNamespaces }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: toolhive-operator-workload-rbac-rolebinding + namespace: {{ . }} + labels: + {{- include "toolhive.labels" $ | nindent 4 }} +subjects: +- kind: ServiceAccount + name: toolhive-operator + namespace: {{ $.Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: toolhive-operator-workload-rbac-role +{{- end }} +{{- end }} + +{{- end }} diff --git a/deploy/charts/operator/templates/deployment.yaml b/deploy/charts/operator/templates/deployment.yaml index d9953de19f..aec45d2f5c 100644 --- a/deploy/charts/operator/templates/deployment.yaml +++ b/deploy/charts/operator/templates/deployment.yaml @@ -64,6 +64,8 @@ spec: value: {{ .Values.operator.features.registry | quote }} - name: ENABLE_VMCP value: {{ .Values.operator.features.virtualMCP | quote }} + - name: DISABLE_WORKLOAD_RBAC + value: {{ .Values.operator.rbac.disableWorkloadRBAC | quote }} {{- if eq .Values.operator.rbac.scope "namespace" }} - name: WATCH_NAMESPACE value: "{{ .Values.operator.rbac.allowedNamespaces | join "," }}" diff --git a/deploy/charts/operator/templates/registry-api-clusterrole.yaml b/deploy/charts/operator/templates/registry-api-clusterrole.yaml index 34cebb100a..10037b6cfc 100644 --- a/deploy/charts/operator/templates/registry-api-clusterrole.yaml +++ b/deploy/charts/operator/templates/registry-api-clusterrole.yaml @@ -1,3 +1,4 @@ +{{- if not .Values.operator.rbac.disableWorkloadRBAC }} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -15,3 +16,4 @@ rules: - get - list - watch +{{- end }} diff --git a/deploy/charts/operator/templates/registry-api-clusterrolebinding.yaml b/deploy/charts/operator/templates/registry-api-clusterrolebinding.yaml index 039c3daae8..217a7f4bf2 100644 --- a/deploy/charts/operator/templates/registry-api-clusterrolebinding.yaml +++ b/deploy/charts/operator/templates/registry-api-clusterrolebinding.yaml @@ -1,3 +1,4 @@ +{{- if not .Values.operator.rbac.disableWorkloadRBAC }} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: @@ -14,3 +15,4 @@ subjects: - kind: ServiceAccount name: {{ .Values.registryAPI.serviceAccount.name }} namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/deploy/charts/operator/values-openshift.yaml b/deploy/charts/operator/values-openshift.yaml index b291051824..48b9960716 100644 --- a/deploy/charts/operator/values-openshift.yaml +++ b/deploy/charts/operator/values-openshift.yaml @@ -126,6 +126,10 @@ operator: # -- List of namespaces that the operator is allowed to have permissions to manage. # Only used if scope is set to "namespace". allowedNamespaces: [] + # -- Disable per-workload RBAC management (ServiceAccount, Role, RoleBinding). + # When true, the operator will not create RBAC resources for workloads. + # Use this when RBAC is managed externally (e.g., via per-workload Helm charts). + disableWorkloadRBAC: false # -- Service account configuration for the operator serviceAccount: diff --git a/deploy/charts/operator/values.yaml b/deploy/charts/operator/values.yaml index a8aca199d2..fc0c405d3f 100644 --- a/deploy/charts/operator/values.yaml +++ b/deploy/charts/operator/values.yaml @@ -126,6 +126,10 @@ operator: # -- List of namespaces that the operator is allowed to have permissions to manage. # Only used if scope is set to "namespace". allowedNamespaces: [] + # -- Disable per-workload RBAC management (ServiceAccount, Role, RoleBinding). + # When true, the operator will not create RBAC resources for workloads. + # Use this when RBAC is managed externally (e.g., via per-workload Helm charts). + disableWorkloadRBAC: false # -- Service account configuration for the operator serviceAccount: