From b58a4e4c26c603e277d9c6bf2bb4d10490066e9d Mon Sep 17 00:00:00 2001 From: Chris Burns <29541485+ChrisJBurns@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:18:20 +0100 Subject: [PATCH 1/7] Restructure MCPTelemetryConfig CRD to nested spec per RFC THV-0023 The MCPTelemetryConfig spec was using a flat inline embedding of telemetry.Config which exposed CLI-only fields and didn't match the nested structure specified in RFC THV-0023. Restructure to use openTelemetry/prometheus sub-objects: - spec.openTelemetry: endpoint, headers, sensitiveHeaders, resourceAttributes, tracing, metrics, useLegacyAttributes - spec.prometheus: enabled - status.referencingWorkloads: structured WorkloadReference (kind/namespace/name) replacing bare string list Update the conversion layer (spectoconfig, controllerutil) to map the nested CRD types to the flat telemetry.Config at runtime. Add operator example YAML files for MCPTelemetryConfig and MCPServer with telemetryConfigRef. Closes #4262 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../api/v1alpha1/mcptelemetryconfig_types.go | 116 ++++++-- .../api/v1alpha1/zz_generated.deepcopy.go | 64 ++++- .../mcptelemetryconfig_controller.go | 64 +++-- .../mcptelemetryconfig_controller_test.go | 61 +++-- .../pkg/controllerutil/telemetry.go | 24 +- .../pkg/controllerutil/telemetry_test.go | 26 +- .../pkg/runconfig/telemetry_test.go | 18 +- .../pkg/spectoconfig/telemetry.go | 31 ++- .../pkg/spectoconfig/telemetry_test.go | 52 ++-- .../mcpserver_runconfig_integration_test.go | 21 +- ...metryconfig_controller_integration_test.go | 55 ++-- ...hive.stacklok.dev_mcptelemetryconfigs.yaml | 257 +++++++++--------- ...hive.stacklok.dev_mcptelemetryconfigs.yaml | 257 +++++++++--------- .../mcp-servers/mcpserver_fetch_otel.yaml | 6 + .../basic-telemetry-config.yaml | 24 ++ .../mcpserver-with-telemetry-config-ref.yaml | 29 ++ .../production-telemetry-config.yaml | 36 +++ 17 files changed, 716 insertions(+), 425 deletions(-) create mode 100644 examples/operator/telemetry-configs/basic-telemetry-config.yaml create mode 100644 examples/operator/telemetry-configs/mcpserver-with-telemetry-config-ref.yaml create mode 100644 examples/operator/telemetry-configs/production-telemetry-config.yaml diff --git a/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go b/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go index eab2093ba3..cbbad5e2e3 100644 --- a/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go +++ b/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go @@ -7,8 +7,6 @@ import ( "fmt" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/stacklok/toolhive/pkg/telemetry" ) // SensitiveHeader represents a header whose value is stored in a Kubernetes Secret. @@ -25,24 +23,88 @@ type SensitiveHeader struct { SecretKeyRef SecretKeyRef `json:"secretKeyRef"` } -// MCPTelemetryConfigSpec defines the desired state of MCPTelemetryConfig. -// It embeds telemetry.Config from pkg/telemetry to eliminate the conversion -// layer between CRD and application types. The environmentVariables field is -// CLI-only and rejected by CEL validation; customAttributes is allowed for -// setting shared OTel resource attributes (e.g., deployment.environment). +// MCPTelemetryOTelConfig defines OpenTelemetry configuration for shared MCPTelemetryConfig resources. +// Unlike OpenTelemetryConfig (used by inline MCPServer telemetry), this type: +// - Omits ServiceName (per-server field set via MCPTelemetryConfigReference) +// - Uses map[string]string for Headers (not []string) +// - Adds SensitiveHeaders for Kubernetes Secret-backed credentials +// - Adds ResourceAttributes for shared OTel resource attributes // -// +kubebuilder:validation:XValidation:rule="!has(self.environmentVariables)",message="environmentVariables is a CLI-only field and cannot be set in MCPTelemetryConfig; use customAttributes for resource attributes" // +kubebuilder:validation:XValidation:rule="!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, !(sh.name in self.headers))",message="a header name cannot appear in both headers and sensitiveHeaders" // //nolint:lll // CEL validation rules exceed line length limit -type MCPTelemetryConfigSpec struct { - telemetry.Config `json:",inline"` // nolint:revive +type MCPTelemetryOTelConfig struct { + // Enabled controls whether OpenTelemetry is enabled + // +kubebuilder:default=false + // +optional + Enabled bool `json:"enabled,omitempty"` + + // Endpoint is the OTLP endpoint URL for tracing and metrics + // +optional + Endpoint string `json:"endpoint,omitempty"` + + // Insecure indicates whether to use HTTP instead of HTTPS for the OTLP endpoint + // +kubebuilder:default=false + // +optional + Insecure bool `json:"insecure,omitempty"` + + // Headers contains authentication headers for the OTLP endpoint. + // For secret-backed credentials, use sensitiveHeaders instead. + // +optional + Headers map[string]string `json:"headers,omitempty"` // SensitiveHeaders contains headers whose values are stored in Kubernetes Secrets. // Use this for credential headers (e.g., API keys, bearer tokens) instead of // embedding secrets in the headers field. // +optional SensitiveHeaders []SensitiveHeader `json:"sensitiveHeaders,omitempty"` + + // ResourceAttributes contains custom resource attributes to be added to all telemetry signals. + // These become OTel resource attributes (e.g., deployment.environment, service.namespace). + // Note: service.name is intentionally excluded — it is set per-server via + // MCPTelemetryConfigReference.ServiceName. + // +optional + ResourceAttributes map[string]string `json:"resourceAttributes,omitempty"` + + // Metrics defines OpenTelemetry metrics-specific configuration + // +optional + Metrics *OpenTelemetryMetricsConfig `json:"metrics,omitempty"` + + // Tracing defines OpenTelemetry tracing configuration + // +optional + Tracing *OpenTelemetryTracingConfig `json:"tracing,omitempty"` + + // UseLegacyAttributes controls whether legacy attribute names are emitted alongside + // the new MCP OTEL semantic convention names. Defaults to true for backward compatibility. + // This will change to false in a future release and eventually be removed. + // +kubebuilder:default=true + // +optional + UseLegacyAttributes bool `json:"useLegacyAttributes"` +} + +// MCPTelemetryConfigSpec defines the desired state of MCPTelemetryConfig. +// The spec uses a nested structure with openTelemetry and prometheus sub-objects +// for clear separation of concerns. +type MCPTelemetryConfigSpec struct { + // OpenTelemetry defines OpenTelemetry configuration (OTLP endpoint, tracing, metrics) + // +optional + OpenTelemetry *MCPTelemetryOTelConfig `json:"openTelemetry,omitempty"` + + // Prometheus defines Prometheus-specific configuration + // +optional + Prometheus *PrometheusConfig `json:"prometheus,omitempty"` +} + +// WorkloadReference identifies a Kubernetes workload that references a shared config resource. +type WorkloadReference struct { + // Kind is the resource kind (e.g., "MCPServer") + Kind string `json:"kind"` + + // Namespace is the resource namespace + Namespace string `json:"namespace"` + + // Name is the resource name + Name string `json:"name"` } // MCPTelemetryConfigStatus defines the observed state of MCPTelemetryConfig @@ -59,17 +121,18 @@ type MCPTelemetryConfigStatus struct { // +optional ConfigHash string `json:"configHash,omitempty"` - // ReferencingServers is a list of MCPServer resources that reference this MCPTelemetryConfig + // ReferencingWorkloads lists workloads that reference this MCPTelemetryConfig // +optional - ReferencingServers []string `json:"referencingServers,omitempty"` + ReferencingWorkloads []WorkloadReference `json:"referencingWorkloads,omitempty"` } // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:shortName=mcpotel,categories=toolhive -// +kubebuilder:printcolumn:name="Endpoint",type=string,JSONPath=`.spec.endpoint` +// +kubebuilder:printcolumn:name="Endpoint",type=string,JSONPath=`.spec.openTelemetry.endpoint` // +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=='Valid')].status` -// +kubebuilder:printcolumn:name="References",type=string,JSONPath=`.status.referencingServers` +// +kubebuilder:printcolumn:name="Tracing",type=boolean,JSONPath=`.spec.openTelemetry.tracing.enabled` +// +kubebuilder:printcolumn:name="Metrics",type=boolean,JSONPath=`.spec.openTelemetry.metrics.enabled` // +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` // MCPTelemetryConfig is the Schema for the mcptelemetryconfigs API. @@ -115,33 +178,26 @@ type MCPTelemetryConfigReference struct { // CEL catches issues at API admission time, but this method also validates // stored objects to catch any that bypassed CEL or were stored before CEL rules were added. func (r *MCPTelemetryConfig) Validate() error { - if err := r.validateCLIOnlyFields(); err != nil { - return err - } return r.validateSensitiveHeaders() } -// validateCLIOnlyFields rejects CLI-only fields that are not applicable to CRD-managed telemetry. -func (r *MCPTelemetryConfig) validateCLIOnlyFields() error { - if len(r.Spec.EnvironmentVariables) > 0 { - return fmt.Errorf("environmentVariables is a CLI-only field and cannot be set in MCPTelemetryConfig") - } - return nil -} - // validateSensitiveHeaders validates sensitive header entries and checks for overlap with plaintext headers. func (r *MCPTelemetryConfig) validateSensitiveHeaders() error { - for i, sh := range r.Spec.SensitiveHeaders { + if r.Spec.OpenTelemetry == nil { + return nil + } + otel := r.Spec.OpenTelemetry + for i, sh := range otel.SensitiveHeaders { if sh.Name == "" { - return fmt.Errorf("sensitiveHeaders[%d].name must not be empty", i) + return fmt.Errorf("openTelemetry.sensitiveHeaders[%d].name must not be empty", i) } if sh.SecretKeyRef.Name == "" { - return fmt.Errorf("sensitiveHeaders[%d].secretKeyRef.name must not be empty", i) + return fmt.Errorf("openTelemetry.sensitiveHeaders[%d].secretKeyRef.name must not be empty", i) } if sh.SecretKeyRef.Key == "" { - return fmt.Errorf("sensitiveHeaders[%d].secretKeyRef.key must not be empty", i) + return fmt.Errorf("openTelemetry.sensitiveHeaders[%d].secretKeyRef.key must not be empty", i) } - if _, exists := r.Spec.Headers[sh.Name]; exists { + if _, exists := otel.Headers[sh.Name]; exists { return fmt.Errorf("header %q appears in both headers and sensitiveHeaders", sh.Name) } } diff --git a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go index 1f0717bb8c..d60bc4fd13 100644 --- a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go @@ -1804,11 +1804,15 @@ func (in *MCPTelemetryConfigReference) DeepCopy() *MCPTelemetryConfigReference { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MCPTelemetryConfigSpec) DeepCopyInto(out *MCPTelemetryConfigSpec) { *out = *in - in.Config.DeepCopyInto(&out.Config) - if in.SensitiveHeaders != nil { - in, out := &in.SensitiveHeaders, &out.SensitiveHeaders - *out = make([]SensitiveHeader, len(*in)) - copy(*out, *in) + if in.OpenTelemetry != nil { + in, out := &in.OpenTelemetry, &out.OpenTelemetry + *out = new(MCPTelemetryOTelConfig) + (*in).DeepCopyInto(*out) + } + if in.Prometheus != nil { + in, out := &in.Prometheus, &out.Prometheus + *out = new(PrometheusConfig) + **out = **in } } @@ -1832,9 +1836,9 @@ func (in *MCPTelemetryConfigStatus) DeepCopyInto(out *MCPTelemetryConfigStatus) (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.ReferencingServers != nil { - in, out := &in.ReferencingServers, &out.ReferencingServers - *out = make([]string, len(*in)) + if in.ReferencingWorkloads != nil { + in, out := &in.ReferencingWorkloads, &out.ReferencingWorkloads + *out = make([]WorkloadReference, len(*in)) copy(*out, *in) } } @@ -1849,6 +1853,50 @@ func (in *MCPTelemetryConfigStatus) DeepCopy() *MCPTelemetryConfigStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MCPTelemetryOTelConfig) DeepCopyInto(out *MCPTelemetryOTelConfig) { + *out = *in + if in.Headers != nil { + in, out := &in.Headers, &out.Headers + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.SensitiveHeaders != nil { + in, out := &in.SensitiveHeaders, &out.SensitiveHeaders + *out = make([]SensitiveHeader, len(*in)) + copy(*out, *in) + } + if in.ResourceAttributes != nil { + in, out := &in.ResourceAttributes, &out.ResourceAttributes + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Metrics != nil { + in, out := &in.Metrics, &out.Metrics + *out = new(OpenTelemetryMetricsConfig) + **out = **in + } + if in.Tracing != nil { + in, out := &in.Tracing, &out.Tracing + *out = new(OpenTelemetryTracingConfig) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MCPTelemetryOTelConfig. +func (in *MCPTelemetryOTelConfig) DeepCopy() *MCPTelemetryOTelConfig { + if in == nil { + return nil + } + out := new(MCPTelemetryOTelConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MCPToolConfig) DeepCopyInto(out *MCPToolConfig) { *out = *in diff --git a/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go b/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go index fae5d34b53..a6dba2076d 100644 --- a/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go +++ b/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go @@ -7,7 +7,6 @@ import ( "context" "fmt" "slices" - "sort" "time" "k8s.io/apimachinery/pkg/api/errors" @@ -108,16 +107,16 @@ func (r *MCPTelemetryConfigReconciler) Reconcile(ctx context.Context, req ctrl.R // Calculate the hash of the current configuration configHash := r.calculateConfigHash(telemetryConfig.Spec) - // Track referencing MCPServers - referencingServers, err := r.findReferencingServers(ctx, telemetryConfig) + // Track referencing workloads + referencingWorkloads, err := r.findReferencingWorkloads(ctx, telemetryConfig) if err != nil { - logger.Error(err, "Failed to find referencing MCPServers") + logger.Error(err, "Failed to find referencing workloads") return ctrl.Result{}, err } // Check what changed hashChanged := telemetryConfig.Status.ConfigHash != configHash - refsChanged := !slices.Equal(telemetryConfig.Status.ReferencingServers, referencingServers) + refsChanged := !workloadRefsEqual(telemetryConfig.Status.ReferencingWorkloads, referencingWorkloads) needsUpdate := hashChanged || refsChanged || conditionChanged if hashChanged { @@ -129,7 +128,7 @@ func (r *MCPTelemetryConfigReconciler) Reconcile(ctx context.Context, req ctrl.R if needsUpdate { telemetryConfig.Status.ConfigHash = configHash telemetryConfig.Status.ObservedGeneration = telemetryConfig.Generation - telemetryConfig.Status.ReferencingServers = referencingServers + telemetryConfig.Status.ReferencingWorkloads = referencingWorkloads if err := r.Status().Update(ctx, telemetryConfig); err != nil { logger.Error(err, "Failed to update MCPTelemetryConfig status") @@ -142,7 +141,7 @@ func (r *MCPTelemetryConfigReconciler) Reconcile(ctx context.Context, req ctrl.R // SetupWithManager sets up the controller with the Manager. func (r *MCPTelemetryConfigReconciler) SetupWithManager(mgr ctrl.Manager) error { - // Watch MCPServer changes to update ReferencingServers status + // Watch MCPServer changes to update ReferencingWorkloads status mcpServerHandler := handler.EnqueueRequestsFromMapFunc( func(_ context.Context, obj client.Object) []reconcile.Request { mcpServer, ok := obj.(*mcpv1alpha1.MCPServer) @@ -187,15 +186,19 @@ func (r *MCPTelemetryConfigReconciler) handleDeletion( return ctrl.Result{}, nil } - // Check for referencing servers before allowing deletion - referencingServers, err := r.findReferencingServers(ctx, telemetryConfig) + // Check for referencing workloads before allowing deletion + referencingWorkloads, err := r.findReferencingWorkloads(ctx, telemetryConfig) if err != nil { - logger.Error(err, "Failed to check referencing servers during deletion") + logger.Error(err, "Failed to check referencing workloads during deletion") return ctrl.Result{}, err } - if len(referencingServers) > 0 { - msg := fmt.Sprintf("cannot delete: still referenced by MCPServer(s): %v", referencingServers) + if len(referencingWorkloads) > 0 { + names := make([]string, 0, len(referencingWorkloads)) + for _, ref := range referencingWorkloads { + names = append(names, fmt.Sprintf("%s/%s", ref.Namespace, ref.Name)) + } + msg := fmt.Sprintf("cannot delete: still referenced by MCPServer(s): %v", names) logger.Info(msg, "telemetryConfig", telemetryConfig.Name) meta.SetStatusCondition(&telemetryConfig.Status.Conditions, metav1.Condition{ Type: "DeletionBlocked", @@ -220,24 +223,49 @@ func (r *MCPTelemetryConfigReconciler) handleDeletion( return ctrl.Result{}, nil } -// findReferencingServers returns a sorted list of MCPServer names in the same namespace +// findReferencingWorkloads returns a sorted list of workload references in the same namespace // that reference this MCPTelemetryConfig via TelemetryConfigRef. -func (r *MCPTelemetryConfigReconciler) findReferencingServers( +func (r *MCPTelemetryConfigReconciler) findReferencingWorkloads( ctx context.Context, telemetryConfig *mcpv1alpha1.MCPTelemetryConfig, -) ([]string, error) { +) ([]mcpv1alpha1.WorkloadReference, error) { mcpServerList := &mcpv1alpha1.MCPServerList{} if err := r.List(ctx, mcpServerList, client.InNamespace(telemetryConfig.Namespace)); err != nil { return nil, fmt.Errorf("failed to list MCPServers: %w", err) } - var refs []string + var refs []mcpv1alpha1.WorkloadReference for _, server := range mcpServerList.Items { if server.Spec.TelemetryConfigRef != nil && server.Spec.TelemetryConfigRef.Name == telemetryConfig.Name { - refs = append(refs, server.Name) + refs = append(refs, mcpv1alpha1.WorkloadReference{ + Kind: "MCPServer", + Namespace: server.Namespace, + Name: server.Name, + }) } } - sort.Strings(refs) + slices.SortFunc(refs, func(a, b mcpv1alpha1.WorkloadReference) int { + if a.Namespace != b.Namespace { + if a.Namespace < b.Namespace { + return -1 + } + return 1 + } + if a.Name < b.Name { + return -1 + } + if a.Name > b.Name { + return 1 + } + return 0 + }) return refs, nil } + +// workloadRefsEqual compares two WorkloadReference slices for equality. +func workloadRefsEqual(a, b []mcpv1alpha1.WorkloadReference) bool { + return slices.EqualFunc(a, b, func(x, y mcpv1alpha1.WorkloadReference) bool { + return x.Kind == y.Kind && x.Namespace == y.Namespace && x.Name == y.Name + }) +} diff --git a/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go b/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go index 9b381615e5..9c37e73899 100644 --- a/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go +++ b/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go @@ -34,7 +34,7 @@ func TestMCPTelemetryConfigReconciler_calculateConfigHash(t *testing.T) { name: "telemetry spec with headers", spec: func() mcpv1alpha1.MCPTelemetryConfigSpec { s := newTelemetrySpec("https://otel-collector:4317", true, true) - s.Headers = map[string]string{"X-Team": "platform"} + s.OpenTelemetry.Headers = map[string]string{"X-Team": "platform"} return s }(), }, @@ -169,7 +169,7 @@ func TestMCPTelemetryConfigReconciler_ValidationRecovery(t *testing.T) { require.NoError(t, mcpv1alpha1.AddToScheme(scheme)) require.NoError(t, corev1.AddToScheme(scheme)) - // Start with invalid config: environmentVariables is CLI-only + // Start with invalid config: empty sensitive header name telemetryConfig := &mcpv1alpha1.MCPTelemetryConfig{ ObjectMeta: metav1.ObjectMeta{ Name: "recovery-config", @@ -179,7 +179,10 @@ func TestMCPTelemetryConfigReconciler_ValidationRecovery(t *testing.T) { }, Spec: func() mcpv1alpha1.MCPTelemetryConfigSpec { s := newTelemetrySpec("https://otel-collector:4317", true, false) - s.EnvironmentVariables = []string{"OTEL_EXPORTER=grpc"} + s.OpenTelemetry.SensitiveHeaders = []mcpv1alpha1.SensitiveHeader{{ + Name: "", + SecretKeyRef: mcpv1alpha1.SecretKeyRef{Name: "s", Key: "k"}, + }} return s }(), } @@ -220,8 +223,8 @@ func TestMCPTelemetryConfigReconciler_ValidationRecovery(t *testing.T) { require.True(t, foundFalse, "Should have Valid=False condition") assert.Empty(t, invalidConfig.Status.ConfigHash, "Hash should not be set for invalid config") - // Fix the config by removing environmentVariables - invalidConfig.Spec.EnvironmentVariables = nil + // Fix the config by removing invalid sensitive headers + invalidConfig.Spec.OpenTelemetry.SensitiveHeaders = nil invalidConfig.Generation = 2 err = fakeClient.Update(ctx, &invalidConfig) require.NoError(t, err) @@ -357,7 +360,7 @@ func TestMCPTelemetryConfigReconciler_ConfigChangeTriggersHashUpdate(t *testing. firstHash := updatedConfig.Status.ConfigHash // Update the config spec (simulate a change) - updatedConfig.Spec.Endpoint = "https://new-collector:4317" + updatedConfig.Spec.OpenTelemetry.Endpoint = "https://new-collector:4317" updatedConfig.Generation = 2 err = fakeClient.Update(ctx, &updatedConfig) require.NoError(t, err) @@ -384,7 +387,7 @@ func TestMCPTelemetryConfigReconciler_ValidationFailureSetsCondition(t *testing. require.NoError(t, mcpv1alpha1.AddToScheme(scheme)) require.NoError(t, corev1.AddToScheme(scheme)) - // Invalid config: environmentVariables is CLI-only + // Invalid config: empty sensitive header name telemetryConfig := &mcpv1alpha1.MCPTelemetryConfig{ ObjectMeta: metav1.ObjectMeta{ Name: "invalid-config", @@ -394,7 +397,10 @@ func TestMCPTelemetryConfigReconciler_ValidationFailureSetsCondition(t *testing. }, Spec: func() mcpv1alpha1.MCPTelemetryConfigSpec { s := newTelemetrySpec("https://otel-collector:4317", true, false) - s.EnvironmentVariables = []string{"OTEL_EXPORTER=grpc"} + s.OpenTelemetry.SensitiveHeaders = []mcpv1alpha1.SensitiveHeader{{ + Name: "", + SecretKeyRef: mcpv1alpha1.SecretKeyRef{Name: "s", Key: "k"}, + }} return s }(), } @@ -458,7 +464,7 @@ func TestMCPTelemetryConfig_Validate(t *testing.T) { config: &mcpv1alpha1.MCPTelemetryConfig{ Spec: func() mcpv1alpha1.MCPTelemetryConfigSpec { s := newTelemetrySpec("https://otel-collector:4317", true, false) - s.SensitiveHeaders = []mcpv1alpha1.SensitiveHeader{ + s.OpenTelemetry.SensitiveHeaders = []mcpv1alpha1.SensitiveHeader{ { Name: "Authorization", SecretKeyRef: mcpv1alpha1.SecretKeyRef{ @@ -472,24 +478,13 @@ func TestMCPTelemetryConfig_Validate(t *testing.T) { }, expectError: false, }, - { - name: "invalid environmentVariables set", - config: &mcpv1alpha1.MCPTelemetryConfig{ - Spec: func() mcpv1alpha1.MCPTelemetryConfigSpec { - s := newTelemetrySpec("https://otel-collector:4317", true, false) - s.EnvironmentVariables = []string{"OTEL_EXPORTER=grpc"} - return s - }(), - }, - expectError: true, - }, { name: "invalid overlapping headers", config: &mcpv1alpha1.MCPTelemetryConfig{ Spec: func() mcpv1alpha1.MCPTelemetryConfigSpec { s := newTelemetrySpec("https://otel-collector:4317", true, false) - s.Headers = map[string]string{"Authorization": "Bearer token"} - s.SensitiveHeaders = []mcpv1alpha1.SensitiveHeader{ + s.OpenTelemetry.Headers = map[string]string{"Authorization": "Bearer token"} + s.OpenTelemetry.SensitiveHeaders = []mcpv1alpha1.SensitiveHeader{ { Name: "Authorization", SecretKeyRef: mcpv1alpha1.SecretKeyRef{ @@ -508,7 +503,7 @@ func TestMCPTelemetryConfig_Validate(t *testing.T) { config: &mcpv1alpha1.MCPTelemetryConfig{ Spec: func() mcpv1alpha1.MCPTelemetryConfigSpec { s := newTelemetrySpec("https://otel-collector:4317", true, false) - s.SensitiveHeaders = []mcpv1alpha1.SensitiveHeader{ + s.OpenTelemetry.SensitiveHeaders = []mcpv1alpha1.SensitiveHeader{ { Name: "Authorization", SecretKeyRef: mcpv1alpha1.SecretKeyRef{ @@ -694,8 +689,11 @@ func TestMCPTelemetryConfigReconciler_ReferenceTracking(t *testing.T) { err = fakeClient.Get(ctx, req.NamespacedName, &updated) require.NoError(t, err) - // ReferencingServers should list server-a and server-b (sorted), but not server-c - assert.Equal(t, []string{"server-a", "server-b"}, updated.Status.ReferencingServers) + // ReferencingWorkloads should list server-a and server-b (sorted), but not server-c + assert.Equal(t, []mcpv1alpha1.WorkloadReference{ + {Kind: "MCPServer", Namespace: "default", Name: "server-a"}, + {Kind: "MCPServer", Namespace: "default", Name: "server-b"}, + }, updated.Status.ReferencingWorkloads) } func TestMCPTelemetryConfigReconciler_handleDeletion_BlocksWhenReferenced(t *testing.T) { @@ -843,9 +841,12 @@ func TestMCPTelemetryConfigReconciler_handleDeletion_NoFinalizerIsNoOp(t *testin // newTelemetrySpec creates a basic MCPTelemetryConfigSpec for testing. func newTelemetrySpec(endpoint string, tracing, metrics bool) mcpv1alpha1.MCPTelemetryConfigSpec { - spec := mcpv1alpha1.MCPTelemetryConfigSpec{} - spec.Endpoint = endpoint - spec.TracingEnabled = tracing - spec.MetricsEnabled = metrics - return spec + return mcpv1alpha1.MCPTelemetryConfigSpec{ + OpenTelemetry: &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: endpoint, + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: tracing}, + Metrics: &mcpv1alpha1.OpenTelemetryMetricsConfig{Enabled: metrics}, + }, + } } diff --git a/cmd/thv-operator/pkg/controllerutil/telemetry.go b/cmd/thv-operator/pkg/controllerutil/telemetry.go index 0601184d8a..cafcad18bc 100644 --- a/cmd/thv-operator/pkg/controllerutil/telemetry.go +++ b/cmd/thv-operator/pkg/controllerutil/telemetry.go @@ -38,19 +38,21 @@ func GenerateOpenTelemetryEnvVarsFromRef( // Inject sensitive headers as env vars so the proxy runner can merge them // into the OTLP exporter at startup. Each header becomes: // TOOLHIVE_OTEL_HEADER_= - for _, sh := range telemetryConfig.Spec.SensitiveHeaders { - envVarName := "TOOLHIVE_OTEL_HEADER_" + normalizeHeaderEnvVarName(sh.Name) - envVars = append(envVars, corev1.EnvVar{ - Name: envVarName, - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: sh.SecretKeyRef.Name, + if telemetryConfig.Spec.OpenTelemetry != nil { + for _, sh := range telemetryConfig.Spec.OpenTelemetry.SensitiveHeaders { + envVarName := "TOOLHIVE_OTEL_HEADER_" + normalizeHeaderEnvVarName(sh.Name) + envVars = append(envVars, corev1.EnvVar{ + Name: envVarName, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: sh.SecretKeyRef.Name, + }, + Key: sh.SecretKeyRef.Key, }, - Key: sh.SecretKeyRef.Key, }, - }, - }) + }) + } } return envVars diff --git a/cmd/thv-operator/pkg/controllerutil/telemetry_test.go b/cmd/thv-operator/pkg/controllerutil/telemetry_test.go index d8123f4cdb..0f82e95533 100644 --- a/cmd/thv-operator/pkg/controllerutil/telemetry_test.go +++ b/cmd/thv-operator/pkg/controllerutil/telemetry_test.go @@ -83,19 +83,21 @@ func TestGenerateOpenTelemetryEnvVarsFromRef(t *testing.T) { name: "sensitive headers produce env vars with SecretKeyRef", telemetryConfig: &mcpv1alpha1.MCPTelemetryConfig{ Spec: mcpv1alpha1.MCPTelemetryConfigSpec{ - SensitiveHeaders: []mcpv1alpha1.SensitiveHeader{ - { - Name: "Authorization", - SecretKeyRef: mcpv1alpha1.SecretKeyRef{ - Name: "otel-secret", - Key: "auth-token", + OpenTelemetry: &mcpv1alpha1.MCPTelemetryOTelConfig{ + SensitiveHeaders: []mcpv1alpha1.SensitiveHeader{ + { + Name: "Authorization", + SecretKeyRef: mcpv1alpha1.SecretKeyRef{ + Name: "otel-secret", + Key: "auth-token", + }, }, - }, - { - Name: "X-API-Key", - SecretKeyRef: mcpv1alpha1.SecretKeyRef{ - Name: "api-secrets", - Key: "api-key", + { + Name: "X-API-Key", + SecretKeyRef: mcpv1alpha1.SecretKeyRef{ + Name: "api-secrets", + Key: "api-key", + }, }, }, }, diff --git a/cmd/thv-operator/pkg/runconfig/telemetry_test.go b/cmd/thv-operator/pkg/runconfig/telemetry_test.go index 4c3adaa90e..c8f507e6b7 100644 --- a/cmd/thv-operator/pkg/runconfig/telemetry_test.go +++ b/cmd/thv-operator/pkg/runconfig/telemetry_test.go @@ -13,7 +13,6 @@ import ( mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" "github.com/stacklok/toolhive/pkg/runner" - "github.com/stacklok/toolhive/pkg/telemetry" ) const ( @@ -270,11 +269,11 @@ func TestAddMCPTelemetryConfigRefOptions(t *testing.T) { { name: "valid spec adds runner option", spec: &mcpv1alpha1.MCPTelemetryConfigSpec{ - Config: telemetry.Config{ - Endpoint: "https://otel-collector:4317", - TracingEnabled: true, - MetricsEnabled: true, - SamplingRate: "0.1", + OpenTelemetry: &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "https://otel-collector:4317", + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true, SamplingRate: "0.1"}, + Metrics: &mcpv1alpha1.OpenTelemetryMetricsConfig{Enabled: true}, }, }, serviceNameOverride: "my-server-service", @@ -313,9 +312,10 @@ func TestAddMCPTelemetryConfigRefOptions_NilOptions(t *testing.T) { t.Parallel() spec := &mcpv1alpha1.MCPTelemetryConfigSpec{ - Config: telemetry.Config{ - Endpoint: "otel-collector:4317", - TracingEnabled: true, + OpenTelemetry: &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "otel-collector:4317", + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true}, }, } diff --git a/cmd/thv-operator/pkg/spectoconfig/telemetry.go b/cmd/thv-operator/pkg/spectoconfig/telemetry.go index 1c7d932bd7..a2249b03b6 100644 --- a/cmd/thv-operator/pkg/spectoconfig/telemetry.go +++ b/cmd/thv-operator/pkg/spectoconfig/telemetry.go @@ -102,7 +102,8 @@ func ConvertTelemetryConfig( } // NormalizeMCPTelemetryConfig converts an MCPTelemetryConfigSpec to a normalized telemetry.Config. -// It applies the per-server ServiceName override from the reference, then delegates to +// It maps the nested CRD structure (openTelemetry/prometheus) to a flat telemetry.Config, +// applies the per-server ServiceName override from the reference, then delegates to // NormalizeTelemetryConfig for endpoint normalization and service name defaulting. func NormalizeMCPTelemetryConfig( spec *v1alpha1.MCPTelemetryConfigSpec, @@ -113,15 +114,37 @@ func NormalizeMCPTelemetryConfig( return nil } - // Copy the embedded config to avoid mutating the original - config := spec.Config + config := &telemetry.Config{} + + // Map nested OpenTelemetry fields to flat telemetry.Config + if spec.OpenTelemetry != nil { + otel := spec.OpenTelemetry + config.Endpoint = otel.Endpoint + config.Insecure = otel.Insecure + config.Headers = otel.Headers + config.CustomAttributes = otel.ResourceAttributes + config.UseLegacyAttributes = otel.UseLegacyAttributes + + if otel.Tracing != nil { + config.TracingEnabled = otel.Tracing.Enabled + config.SamplingRate = otel.Tracing.SamplingRate + } + if otel.Metrics != nil { + config.MetricsEnabled = otel.Metrics.Enabled + } + } + + // Map Prometheus configuration + if spec.Prometheus != nil { + config.EnablePrometheusMetricsPath = spec.Prometheus.Enabled + } // Apply per-server service name override from the TelemetryConfigRef if serviceNameOverride != "" { config.ServiceName = serviceNameOverride } - return NormalizeTelemetryConfig(&config, defaultServiceName) + return NormalizeTelemetryConfig(config, defaultServiceName) } // NormalizeTelemetryConfig applies runtime normalization to a telemetry.Config. diff --git a/cmd/thv-operator/pkg/spectoconfig/telemetry_test.go b/cmd/thv-operator/pkg/spectoconfig/telemetry_test.go index 650277eba6..7a1d5534b2 100644 --- a/cmd/thv-operator/pkg/spectoconfig/telemetry_test.go +++ b/cmd/thv-operator/pkg/spectoconfig/telemetry_test.go @@ -202,9 +202,9 @@ func TestNormalizeMCPTelemetryConfig(t *testing.T) { { name: "service name override takes precedence", spec: &v1alpha1.MCPTelemetryConfigSpec{ - Config: telemetry.Config{ - Endpoint: "https://otel-collector:4317", - ServiceName: "spec-service-name", + OpenTelemetry: &v1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "https://otel-collector:4317", }, }, serviceNameOverride: "per-server-override", @@ -217,9 +217,9 @@ func TestNormalizeMCPTelemetryConfig(t *testing.T) { { name: "empty override falls through to defaultServiceName", spec: &v1alpha1.MCPTelemetryConfigSpec{ - Config: telemetry.Config{ - Endpoint: "otel-collector:4317", - ServiceName: "", + OpenTelemetry: &v1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "otel-collector:4317", }, }, serviceNameOverride: "", @@ -232,13 +232,13 @@ func TestNormalizeMCPTelemetryConfig(t *testing.T) { { name: "endpoint normalization strips http:// prefix", spec: &v1alpha1.MCPTelemetryConfigSpec{ - Config: telemetry.Config{ - Endpoint: "http://collector.monitoring:4317", - ServiceName: "my-service", - TracingEnabled: true, + OpenTelemetry: &v1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "http://collector.monitoring:4317", + Tracing: &v1alpha1.OpenTelemetryTracingConfig{Enabled: true}, }, }, - serviceNameOverride: "", + serviceNameOverride: "my-service", defaultServiceName: "fallback", expected: &telemetry.Config{ Endpoint: "collector.monitoring:4317", @@ -249,12 +249,12 @@ func TestNormalizeMCPTelemetryConfig(t *testing.T) { { name: "endpoint normalization strips https:// prefix", spec: &v1alpha1.MCPTelemetryConfigSpec{ - Config: telemetry.Config{ - Endpoint: "https://secure-collector:4317", - ServiceName: "my-service", + OpenTelemetry: &v1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "https://secure-collector:4317", }, }, - serviceNameOverride: "", + serviceNameOverride: "my-service", defaultServiceName: "fallback", expected: &telemetry.Config{ Endpoint: "secure-collector:4317", @@ -262,18 +262,18 @@ func TestNormalizeMCPTelemetryConfig(t *testing.T) { }, }, { - name: "spec ServiceName used when no override and not empty", + name: "default service name used when no override", spec: &v1alpha1.MCPTelemetryConfigSpec{ - Config: telemetry.Config{ - Endpoint: "collector:4317", - ServiceName: "spec-level-name", + OpenTelemetry: &v1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "collector:4317", }, }, serviceNameOverride: "", defaultServiceName: "fallback", expected: &telemetry.Config{ Endpoint: "collector:4317", - ServiceName: "spec-level-name", + ServiceName: "fallback", }, }, } @@ -297,20 +297,18 @@ func TestNormalizeMCPTelemetryConfig_DoesNotModifyInput(t *testing.T) { t.Parallel() spec := &v1alpha1.MCPTelemetryConfigSpec{ - Config: telemetry.Config{ - Endpoint: "https://otel-collector:4317", - ServiceName: "", + OpenTelemetry: &v1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "https://otel-collector:4317", }, } - originalEndpoint := spec.Endpoint - originalServiceName := spec.ServiceName + originalEndpoint := spec.OpenTelemetry.Endpoint result := NormalizeMCPTelemetryConfig(spec, "override-name", "default-name") // Verify the original spec was not modified - assert.Equal(t, originalEndpoint, spec.Endpoint, "Input endpoint should not be modified") - assert.Equal(t, originalServiceName, spec.ServiceName, "Input ServiceName should not be modified") + assert.Equal(t, originalEndpoint, spec.OpenTelemetry.Endpoint, "Input endpoint should not be modified") // Verify result has normalized values require.NotNil(t, result) diff --git a/cmd/thv-operator/test-integration/mcp-server/mcpserver_runconfig_integration_test.go b/cmd/thv-operator/test-integration/mcp-server/mcpserver_runconfig_integration_test.go index 34565989d4..223e7fa395 100644 --- a/cmd/thv-operator/test-integration/mcp-server/mcpserver_runconfig_integration_test.go +++ b/cmd/thv-operator/test-integration/mcp-server/mcpserver_runconfig_integration_test.go @@ -546,12 +546,14 @@ var _ = Describe("RunConfig ConfigMap Integration Tests", func() { Namespace: namespace, }, } - telCfg.Spec.Endpoint = "otel-collector:4317" - telCfg.Spec.Insecure = true - telCfg.Spec.TracingEnabled = true - telCfg.Spec.MetricsEnabled = true - telCfg.Spec.SamplingRate = "0.1" - telCfg.Spec.EnablePrometheusMetricsPath = true + telCfg.Spec.OpenTelemetry = &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "otel-collector:4317", + Insecure: true, + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true, SamplingRate: "0.1"}, + Metrics: &mcpv1alpha1.OpenTelemetryMetricsConfig{Enabled: true}, + } + telCfg.Spec.Prometheus = &mcpv1alpha1.PrometheusConfig{Enabled: true} Expect(k8sClient.Create(ctx, telCfg)).To(Succeed()) defer k8sClient.Delete(ctx, telCfg) //nolint:errcheck @@ -630,8 +632,11 @@ var _ = Describe("RunConfig ConfigMap Integration Tests", func() { Namespace: namespace, }, } - telCfg.Spec.Endpoint = "otel-collector:4317" - telCfg.Spec.TracingEnabled = true + telCfg.Spec.OpenTelemetry = &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "otel-collector:4317", + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true}, + } Expect(k8sClient.Create(ctx, telCfg)).To(Succeed()) defer k8sClient.Delete(ctx, telCfg) //nolint:errcheck diff --git a/cmd/thv-operator/test-integration/mcp-telemetry-config/mcptelemetryconfig_controller_integration_test.go b/cmd/thv-operator/test-integration/mcp-telemetry-config/mcptelemetryconfig_controller_integration_test.go index 8cba404d74..48d4f6920b 100644 --- a/cmd/thv-operator/test-integration/mcp-telemetry-config/mcptelemetryconfig_controller_integration_test.go +++ b/cmd/thv-operator/test-integration/mcp-telemetry-config/mcptelemetryconfig_controller_integration_test.go @@ -28,9 +28,12 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { Namespace: "default", }, } - telemetryConfig.Spec.Endpoint = testEndpoint - telemetryConfig.Spec.TracingEnabled = true - telemetryConfig.Spec.MetricsEnabled = true + telemetryConfig.Spec.OpenTelemetry = &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: testEndpoint, + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true}, + Metrics: &mcpv1alpha1.OpenTelemetryMetricsConfig{Enabled: true}, + } Expect(k8sClient.Create(ctx, telemetryConfig)).To(Succeed()) @@ -73,8 +76,11 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { Namespace: "default", }, } - telemetryConfig.Spec.Endpoint = testEndpoint - telemetryConfig.Spec.TracingEnabled = true + telemetryConfig.Spec.OpenTelemetry = &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: testEndpoint, + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true}, + } Expect(k8sClient.Create(ctx, telemetryConfig)).To(Succeed()) @@ -100,7 +106,7 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { Namespace: telemetryConfig.Namespace, }, fetched)).To(Succeed()) - fetched.Spec.Endpoint = "https://new-collector:4317" + fetched.Spec.OpenTelemetry.Endpoint = "https://new-collector:4317" Expect(k8sClient.Update(ctx, fetched)).To(Succeed()) // Verify hash changed @@ -124,8 +130,11 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { Namespace: "default", }, } - telemetryConfig.Spec.Endpoint = testEndpoint - telemetryConfig.Spec.TracingEnabled = true + telemetryConfig.Spec.OpenTelemetry = &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: testEndpoint, + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true}, + } Expect(k8sClient.Create(ctx, telemetryConfig)).To(Succeed()) @@ -169,8 +178,11 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { Namespace: "default", }, } - telemetryConfig.Spec.Endpoint = testEndpoint - telemetryConfig.Spec.TracingEnabled = true + telemetryConfig.Spec.OpenTelemetry = &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: testEndpoint, + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true}, + } Expect(k8sClient.Create(ctx, telemetryConfig)).To(Succeed()) @@ -200,7 +212,7 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { Expect(k8sClient.Create(ctx, server)).To(Succeed()) // The MCPServer watch should trigger a reconciliation of the MCPTelemetryConfig. - // Verify ReferencingServers is updated to include our server. + // Verify ReferencingWorkloads is updated to include our server. Eventually(func() []string { fetched := &mcpv1alpha1.MCPTelemetryConfig{} err := k8sClient.Get(ctx, types.NamespacedName{ @@ -210,7 +222,11 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { if err != nil { return nil } - return fetched.Status.ReferencingServers + names := make([]string, 0, len(fetched.Status.ReferencingWorkloads)) + for _, ref := range fetched.Status.ReferencingWorkloads { + names = append(names, ref.Name) + } + return names }, timeout, interval).Should(ContainElement("server-ref-tracking")) }) @@ -222,8 +238,11 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { Namespace: "default", }, } - telemetryConfig.Spec.Endpoint = testEndpoint - telemetryConfig.Spec.TracingEnabled = true + telemetryConfig.Spec.OpenTelemetry = &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: testEndpoint, + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true}, + } Expect(k8sClient.Create(ctx, telemetryConfig)).To(Succeed()) @@ -260,7 +279,7 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { } Expect(k8sClient.Create(ctx, server)).To(Succeed()) - // Wait for ReferencingServers to be populated + // Wait for ReferencingWorkloads to be populated Eventually(func() []string { fetched := &mcpv1alpha1.MCPTelemetryConfig{} err := k8sClient.Get(ctx, types.NamespacedName{ @@ -270,7 +289,11 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { if err != nil { return nil } - return fetched.Status.ReferencingServers + names := make([]string, 0, len(fetched.Status.ReferencingWorkloads)) + for _, ref := range fetched.Status.ReferencingWorkloads { + names = append(names, ref.Name) + } + return names }, timeout, interval).Should(ContainElement("server-deletion-blocker")) // Attempt to delete the config — the API call succeeds (sets DeletionTimestamp) diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index 03ddb80010..0ae849dc83 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -19,15 +19,15 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: - - jsonPath: .spec.endpoint + - jsonPath: .spec.openTelemetry.endpoint name: Endpoint type: string - - jsonPath: .status.conditions[?(@.type=='Valid')].status - name: Ready - type: string - - jsonPath: .status.referencingServers - name: References - type: string + - jsonPath: .spec.openTelemetry.tracing.enabled + name: Tracing + type: boolean + - jsonPath: .spec.openTelemetry.metrics.enabled + name: Metrics + type: boolean - jsonPath: .metadata.creationTimestamp name: Age type: date @@ -60,130 +60,119 @@ spec: spec: description: |- MCPTelemetryConfigSpec defines the desired state of MCPTelemetryConfig. - It embeds telemetry.Config from pkg/telemetry to eliminate the conversion - layer between CRD and application types. The environmentVariables field is - CLI-only and rejected by CEL validation; customAttributes is allowed for - setting shared OTel resource attributes (e.g., deployment.environment). + The spec uses a nested structure with openTelemetry and prometheus sub-objects + for clear separation of concerns. properties: - customAttributes: - additionalProperties: - type: string - description: |- - CustomAttributes contains custom resource attributes to be added to all telemetry signals. - These are parsed from CLI flags (--otel-custom-attributes) or environment variables - (OTEL_RESOURCE_ATTRIBUTES) as key=value pairs. - type: object - enablePrometheusMetricsPath: - default: false - description: |- - EnablePrometheusMetricsPath controls whether to expose Prometheus-style /metrics endpoint. - The metrics are served on the main transport port at /metrics. - This is separate from OTLP metrics which are sent to the Endpoint. - type: boolean - endpoint: - description: Endpoint is the OTLP endpoint URL - type: string - environmentVariables: - description: |- - EnvironmentVariables is a list of environment variable names that should be - included in telemetry spans as attributes. Only variables in this list will - be read from the host machine and included in spans for observability. - Example: ["NODE_ENV", "DEPLOYMENT_ENV", "SERVICE_VERSION"] - items: - type: string - type: array - headers: - additionalProperties: - type: string - description: Headers contains authentication headers for the OTLP - endpoint. - type: object - insecure: - default: false - description: Insecure indicates whether to use HTTP instead of HTTPS - for the OTLP endpoint. - type: boolean - metricsEnabled: - default: false - description: |- - MetricsEnabled controls whether OTLP metrics are enabled. - When false, OTLP metrics are not sent even if an endpoint is configured. - This is independent of EnablePrometheusMetricsPath. - type: boolean - samplingRate: - default: "0.05" - description: |- - SamplingRate is the trace sampling rate (0.0-1.0) as a string. - Only used when TracingEnabled is true. - Example: "0.05" for 5% sampling. - type: string - sensitiveHeaders: - description: |- - SensitiveHeaders contains headers whose values are stored in Kubernetes Secrets. - Use this for credential headers (e.g., API keys, bearer tokens) instead of - embedding secrets in the headers field. - items: - description: |- - SensitiveHeader represents a header whose value is stored in a Kubernetes Secret. - This allows credential headers (e.g., API keys, bearer tokens) to be securely - referenced without embedding secrets inline in the MCPTelemetryConfig resource. - properties: - name: - description: Name is the header name (e.g., "Authorization", - "X-API-Key") - minLength: 1 + openTelemetry: + description: OpenTelemetry defines OpenTelemetry configuration (OTLP + endpoint, tracing, metrics) + properties: + enabled: + default: false + description: Enabled controls whether OpenTelemetry is enabled + type: boolean + endpoint: + description: Endpoint is the OTLP endpoint URL for tracing and + metrics + type: string + headers: + additionalProperties: type: string - secretKeyRef: - description: SecretKeyRef is a reference to a Kubernetes Secret - key containing the header value + description: |- + Headers contains authentication headers for the OTLP endpoint. + For secret-backed credentials, use sensitiveHeaders instead. + type: object + insecure: + default: false + description: Insecure indicates whether to use HTTP instead of + HTTPS for the OTLP endpoint + type: boolean + metrics: + description: Metrics defines OpenTelemetry metrics-specific configuration + properties: + enabled: + default: false + description: Enabled controls whether OTLP metrics are sent + type: boolean + type: object + resourceAttributes: + additionalProperties: + type: string + description: |- + ResourceAttributes contains custom resource attributes to be added to all telemetry signals. + These become OTel resource attributes (e.g., deployment.environment, service.namespace). + Note: service.name is intentionally excluded — it is set per-server via + MCPTelemetryConfigReference.ServiceName. + type: object + sensitiveHeaders: + description: |- + SensitiveHeaders contains headers whose values are stored in Kubernetes Secrets. + Use this for credential headers (e.g., API keys, bearer tokens) instead of + embedding secrets in the headers field. + items: + description: |- + SensitiveHeader represents a header whose value is stored in a Kubernetes Secret. + This allows credential headers (e.g., API keys, bearer tokens) to be securely + referenced without embedding secrets inline in the MCPTelemetryConfig resource. properties: - key: - description: Key is the key within the secret - type: string name: - description: Name is the name of the secret + description: Name is the header name (e.g., "Authorization", + "X-API-Key") + minLength: 1 type: string + secretKeyRef: + description: SecretKeyRef is a reference to a Kubernetes + Secret key containing the header value + properties: + key: + description: Key is the key within the secret + type: string + name: + description: Name is the name of the secret + type: string + required: + - key + - name + type: object required: - - key - name + - secretKeyRef type: object - required: - - name - - secretKeyRef - type: object - type: array - serviceName: - description: |- - ServiceName is the service name for telemetry. - When omitted, defaults to the server name (e.g., VirtualMCPServer name). - type: string - serviceVersion: - description: |- - ServiceVersion is the service version for telemetry. - When omitted, defaults to the ToolHive version. - type: string - tracingEnabled: - default: false - description: |- - TracingEnabled controls whether distributed tracing is enabled. - When false, no tracer provider is created even if an endpoint is configured. - type: boolean - useLegacyAttributes: - default: true - description: |- - UseLegacyAttributes controls whether legacy (pre-MCP OTEL semconv) attribute names - are emitted alongside the new standard attribute names. When true, spans include both - old and new attribute names for backward compatibility with existing dashboards. - Currently defaults to true; this will change to false in a future release. - type: boolean + type: array + tracing: + description: Tracing defines OpenTelemetry tracing configuration + properties: + enabled: + default: false + description: Enabled controls whether OTLP tracing is sent + type: boolean + samplingRate: + default: "0.05" + description: SamplingRate is the trace sampling rate (0.0-1.0) + type: string + type: object + useLegacyAttributes: + default: true + description: |- + UseLegacyAttributes controls whether legacy attribute names are emitted alongside + the new MCP OTEL semantic convention names. Defaults to true for backward compatibility. + This will change to false in a future release and eventually be removed. + type: boolean + type: object + x-kubernetes-validations: + - message: a header name cannot appear in both headers and sensitiveHeaders + rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, + !(sh.name in self.headers))' + prometheus: + description: Prometheus defines Prometheus-specific configuration + properties: + enabled: + default: false + description: Enabled controls whether Prometheus metrics endpoint + is exposed + type: boolean + type: object type: object - x-kubernetes-validations: - - message: environmentVariables is a CLI-only field and cannot be set - in MCPTelemetryConfig; use customAttributes for resource attributes - rule: '!has(self.environmentVariables)' - - message: a header name cannot appear in both headers and sensitiveHeaders - rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, - !(sh.name in self.headers))' status: description: MCPTelemetryConfigStatus defines the observed state of MCPTelemetryConfig properties: @@ -254,11 +243,27 @@ spec: for this MCPTelemetryConfig. format: int64 type: integer - referencingServers: - description: ReferencingServers is a list of MCPServer resources that - reference this MCPTelemetryConfig + referencingWorkloads: + description: ReferencingWorkloads lists workloads that reference this + MCPTelemetryConfig items: - type: string + description: WorkloadReference identifies a Kubernetes workload + that references a shared config resource. + properties: + kind: + description: Kind is the resource kind (e.g., "MCPServer") + type: string + name: + description: Name is the resource name + type: string + namespace: + description: Namespace is the resource namespace + type: string + required: + - kind + - name + - namespace + type: object type: array type: object type: object diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index 88756e96b3..56f3fe59cb 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -22,15 +22,15 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: - - jsonPath: .spec.endpoint + - jsonPath: .spec.openTelemetry.endpoint name: Endpoint type: string - - jsonPath: .status.conditions[?(@.type=='Valid')].status - name: Ready - type: string - - jsonPath: .status.referencingServers - name: References - type: string + - jsonPath: .spec.openTelemetry.tracing.enabled + name: Tracing + type: boolean + - jsonPath: .spec.openTelemetry.metrics.enabled + name: Metrics + type: boolean - jsonPath: .metadata.creationTimestamp name: Age type: date @@ -63,130 +63,119 @@ spec: spec: description: |- MCPTelemetryConfigSpec defines the desired state of MCPTelemetryConfig. - It embeds telemetry.Config from pkg/telemetry to eliminate the conversion - layer between CRD and application types. The environmentVariables field is - CLI-only and rejected by CEL validation; customAttributes is allowed for - setting shared OTel resource attributes (e.g., deployment.environment). + The spec uses a nested structure with openTelemetry and prometheus sub-objects + for clear separation of concerns. properties: - customAttributes: - additionalProperties: - type: string - description: |- - CustomAttributes contains custom resource attributes to be added to all telemetry signals. - These are parsed from CLI flags (--otel-custom-attributes) or environment variables - (OTEL_RESOURCE_ATTRIBUTES) as key=value pairs. - type: object - enablePrometheusMetricsPath: - default: false - description: |- - EnablePrometheusMetricsPath controls whether to expose Prometheus-style /metrics endpoint. - The metrics are served on the main transport port at /metrics. - This is separate from OTLP metrics which are sent to the Endpoint. - type: boolean - endpoint: - description: Endpoint is the OTLP endpoint URL - type: string - environmentVariables: - description: |- - EnvironmentVariables is a list of environment variable names that should be - included in telemetry spans as attributes. Only variables in this list will - be read from the host machine and included in spans for observability. - Example: ["NODE_ENV", "DEPLOYMENT_ENV", "SERVICE_VERSION"] - items: - type: string - type: array - headers: - additionalProperties: - type: string - description: Headers contains authentication headers for the OTLP - endpoint. - type: object - insecure: - default: false - description: Insecure indicates whether to use HTTP instead of HTTPS - for the OTLP endpoint. - type: boolean - metricsEnabled: - default: false - description: |- - MetricsEnabled controls whether OTLP metrics are enabled. - When false, OTLP metrics are not sent even if an endpoint is configured. - This is independent of EnablePrometheusMetricsPath. - type: boolean - samplingRate: - default: "0.05" - description: |- - SamplingRate is the trace sampling rate (0.0-1.0) as a string. - Only used when TracingEnabled is true. - Example: "0.05" for 5% sampling. - type: string - sensitiveHeaders: - description: |- - SensitiveHeaders contains headers whose values are stored in Kubernetes Secrets. - Use this for credential headers (e.g., API keys, bearer tokens) instead of - embedding secrets in the headers field. - items: - description: |- - SensitiveHeader represents a header whose value is stored in a Kubernetes Secret. - This allows credential headers (e.g., API keys, bearer tokens) to be securely - referenced without embedding secrets inline in the MCPTelemetryConfig resource. - properties: - name: - description: Name is the header name (e.g., "Authorization", - "X-API-Key") - minLength: 1 + openTelemetry: + description: OpenTelemetry defines OpenTelemetry configuration (OTLP + endpoint, tracing, metrics) + properties: + enabled: + default: false + description: Enabled controls whether OpenTelemetry is enabled + type: boolean + endpoint: + description: Endpoint is the OTLP endpoint URL for tracing and + metrics + type: string + headers: + additionalProperties: type: string - secretKeyRef: - description: SecretKeyRef is a reference to a Kubernetes Secret - key containing the header value + description: |- + Headers contains authentication headers for the OTLP endpoint. + For secret-backed credentials, use sensitiveHeaders instead. + type: object + insecure: + default: false + description: Insecure indicates whether to use HTTP instead of + HTTPS for the OTLP endpoint + type: boolean + metrics: + description: Metrics defines OpenTelemetry metrics-specific configuration + properties: + enabled: + default: false + description: Enabled controls whether OTLP metrics are sent + type: boolean + type: object + resourceAttributes: + additionalProperties: + type: string + description: |- + ResourceAttributes contains custom resource attributes to be added to all telemetry signals. + These become OTel resource attributes (e.g., deployment.environment, service.namespace). + Note: service.name is intentionally excluded — it is set per-server via + MCPTelemetryConfigReference.ServiceName. + type: object + sensitiveHeaders: + description: |- + SensitiveHeaders contains headers whose values are stored in Kubernetes Secrets. + Use this for credential headers (e.g., API keys, bearer tokens) instead of + embedding secrets in the headers field. + items: + description: |- + SensitiveHeader represents a header whose value is stored in a Kubernetes Secret. + This allows credential headers (e.g., API keys, bearer tokens) to be securely + referenced without embedding secrets inline in the MCPTelemetryConfig resource. properties: - key: - description: Key is the key within the secret - type: string name: - description: Name is the name of the secret + description: Name is the header name (e.g., "Authorization", + "X-API-Key") + minLength: 1 type: string + secretKeyRef: + description: SecretKeyRef is a reference to a Kubernetes + Secret key containing the header value + properties: + key: + description: Key is the key within the secret + type: string + name: + description: Name is the name of the secret + type: string + required: + - key + - name + type: object required: - - key - name + - secretKeyRef type: object - required: - - name - - secretKeyRef - type: object - type: array - serviceName: - description: |- - ServiceName is the service name for telemetry. - When omitted, defaults to the server name (e.g., VirtualMCPServer name). - type: string - serviceVersion: - description: |- - ServiceVersion is the service version for telemetry. - When omitted, defaults to the ToolHive version. - type: string - tracingEnabled: - default: false - description: |- - TracingEnabled controls whether distributed tracing is enabled. - When false, no tracer provider is created even if an endpoint is configured. - type: boolean - useLegacyAttributes: - default: true - description: |- - UseLegacyAttributes controls whether legacy (pre-MCP OTEL semconv) attribute names - are emitted alongside the new standard attribute names. When true, spans include both - old and new attribute names for backward compatibility with existing dashboards. - Currently defaults to true; this will change to false in a future release. - type: boolean + type: array + tracing: + description: Tracing defines OpenTelemetry tracing configuration + properties: + enabled: + default: false + description: Enabled controls whether OTLP tracing is sent + type: boolean + samplingRate: + default: "0.05" + description: SamplingRate is the trace sampling rate (0.0-1.0) + type: string + type: object + useLegacyAttributes: + default: true + description: |- + UseLegacyAttributes controls whether legacy attribute names are emitted alongside + the new MCP OTEL semantic convention names. Defaults to true for backward compatibility. + This will change to false in a future release and eventually be removed. + type: boolean + type: object + x-kubernetes-validations: + - message: a header name cannot appear in both headers and sensitiveHeaders + rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, + !(sh.name in self.headers))' + prometheus: + description: Prometheus defines Prometheus-specific configuration + properties: + enabled: + default: false + description: Enabled controls whether Prometheus metrics endpoint + is exposed + type: boolean + type: object type: object - x-kubernetes-validations: - - message: environmentVariables is a CLI-only field and cannot be set - in MCPTelemetryConfig; use customAttributes for resource attributes - rule: '!has(self.environmentVariables)' - - message: a header name cannot appear in both headers and sensitiveHeaders - rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, - !(sh.name in self.headers))' status: description: MCPTelemetryConfigStatus defines the observed state of MCPTelemetryConfig properties: @@ -257,11 +246,27 @@ spec: for this MCPTelemetryConfig. format: int64 type: integer - referencingServers: - description: ReferencingServers is a list of MCPServer resources that - reference this MCPTelemetryConfig + referencingWorkloads: + description: ReferencingWorkloads lists workloads that reference this + MCPTelemetryConfig items: - type: string + description: WorkloadReference identifies a Kubernetes workload + that references a shared config resource. + properties: + kind: + description: Kind is the resource kind (e.g., "MCPServer") + type: string + name: + description: Name is the resource name + type: string + namespace: + description: Namespace is the resource namespace + type: string + required: + - kind + - name + - namespace + type: object type: array type: object type: object diff --git a/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml b/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml index 9c7ad7f7c9..a072601fc7 100644 --- a/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml +++ b/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml @@ -1,3 +1,8 @@ +# DEPRECATED: Inline telemetry configuration. +# +# The spec.telemetry field is deprecated and will be removed in a future release. +# Prefer spec.telemetryConfigRef with a shared MCPTelemetryConfig resource instead. +# See examples/operator/telemetry-configs/ for the recommended approach. apiVersion: toolhive.stacklok.dev/v1alpha1 kind: MCPServer metadata: @@ -15,6 +20,7 @@ spec: requests: cpu: "50m" memory: "64Mi" + # Deprecated: inline telemetry block. Migrate to telemetryConfigRef. telemetry: openTelemetry: # Point to our OpenTelemetry Collector diff --git a/examples/operator/telemetry-configs/basic-telemetry-config.yaml b/examples/operator/telemetry-configs/basic-telemetry-config.yaml new file mode 100644 index 0000000000..91fefa741e --- /dev/null +++ b/examples/operator/telemetry-configs/basic-telemetry-config.yaml @@ -0,0 +1,24 @@ +# Basic MCPTelemetryConfig with OTLP tracing and metrics. +# +# This is the simplest useful MCPTelemetryConfig: it enables OpenTelemetry +# with OTLP export (tracing + metrics) and Prometheus scraping. +# +# MCPServer resources in the same namespace can reference this config via +# spec.telemetryConfigRef.name, providing a unique serviceName per server. +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPTelemetryConfig +metadata: + name: basic-telemetry + namespace: toolhive-system +spec: + openTelemetry: + enabled: true + endpoint: otel-collector.monitoring.svc.cluster.local:4318 + insecure: true + tracing: + enabled: true + samplingRate: "0.1" + metrics: + enabled: true + prometheus: + enabled: true diff --git a/examples/operator/telemetry-configs/mcpserver-with-telemetry-config-ref.yaml b/examples/operator/telemetry-configs/mcpserver-with-telemetry-config-ref.yaml new file mode 100644 index 0000000000..fb9f0699fc --- /dev/null +++ b/examples/operator/telemetry-configs/mcpserver-with-telemetry-config-ref.yaml @@ -0,0 +1,29 @@ +# MCPServer that references a shared MCPTelemetryConfig. +# +# Instead of defining telemetry inline (deprecated), this MCPServer uses +# spec.telemetryConfigRef to point to a shared MCPTelemetryConfig in the +# same namespace. The serviceName field provides a unique OTel service name +# for this server's traces and metrics. +# +# The referenced MCPTelemetryConfig ("basic-telemetry") must exist in +# toolhive-system before this MCPServer is created. +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPServer +metadata: + name: fetch + namespace: toolhive-system +spec: + image: ghcr.io/stackloklabs/gofetch/server + transport: streamable-http + proxyPort: 8080 + mcpPort: 8080 + resources: + limits: + cpu: "100m" + memory: "128Mi" + requests: + cpu: "50m" + memory: "64Mi" + telemetryConfigRef: + name: basic-telemetry + serviceName: mcp-fetch-server diff --git a/examples/operator/telemetry-configs/production-telemetry-config.yaml b/examples/operator/telemetry-configs/production-telemetry-config.yaml new file mode 100644 index 0000000000..aa19e70730 --- /dev/null +++ b/examples/operator/telemetry-configs/production-telemetry-config.yaml @@ -0,0 +1,36 @@ +# Production-ready MCPTelemetryConfig with all features. +# +# Demonstrates: +# - Resource attributes for environment tagging +# - Secret-backed sensitive headers for authenticated OTLP export +# - Tracing with a conservative sampling rate +# - Prometheus scraping enabled +# +# Prerequisites: +# kubectl create secret generic otel-auth-secret \ +# --namespace=mcp-platform \ +# --from-literal=bearer-token='Bearer ' +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPTelemetryConfig +metadata: + name: production-telemetry + namespace: mcp-platform +spec: + openTelemetry: + enabled: true + endpoint: otel-collector.monitoring.svc.cluster.local:4318 + resourceAttributes: + service.namespace: toolhive + deployment.environment: production + sensitiveHeaders: + - name: Authorization + secretKeyRef: + name: otel-auth-secret + key: bearer-token + tracing: + enabled: true + samplingRate: "0.1" + metrics: + enabled: true + prometheus: + enabled: true From bb63c53834f5ee1a83d2bfd62ab2a6d1e6e0eedb Mon Sep 17 00:00:00 2001 From: Chris Burns <29541485+ChrisJBurns@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:24:46 +0100 Subject: [PATCH 2/7] Consolidate telemetry examples into single multi-document YAML Combine MCPTelemetryConfig and MCPServer-with-ref examples into mcpserver_fetch_otel.yaml as a single apply-able file. Remove the separate telemetry-configs directory. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../mcp-servers/mcpserver_fetch_otel.yaml | 49 +++++++++++-------- .../basic-telemetry-config.yaml | 24 --------- .../mcpserver-with-telemetry-config-ref.yaml | 29 ----------- .../production-telemetry-config.yaml | 36 -------------- 4 files changed, 28 insertions(+), 110 deletions(-) delete mode 100644 examples/operator/telemetry-configs/basic-telemetry-config.yaml delete mode 100644 examples/operator/telemetry-configs/mcpserver-with-telemetry-config-ref.yaml delete mode 100644 examples/operator/telemetry-configs/production-telemetry-config.yaml diff --git a/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml b/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml index a072601fc7..8b919a70d1 100644 --- a/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml +++ b/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml @@ -1,8 +1,29 @@ -# DEPRECATED: Inline telemetry configuration. +# Shared MCPTelemetryConfig with OTLP tracing, metrics, and Prometheus. # -# The spec.telemetry field is deprecated and will be removed in a future release. -# Prefer spec.telemetryConfigRef with a shared MCPTelemetryConfig resource instead. -# See examples/operator/telemetry-configs/ for the recommended approach. +# Define telemetry configuration once and reference it from multiple MCPServers. +# Each MCPServer provides a unique serviceName for its traces and metrics. +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPTelemetryConfig +metadata: + name: basic-telemetry + namespace: toolhive-system +spec: + openTelemetry: + enabled: true + endpoint: otel-collector.monitoring.svc.cluster.local:4318 + insecure: true + tracing: + enabled: true + samplingRate: "0.1" + metrics: + enabled: true + prometheus: + enabled: true +--- +# MCPServer that references the shared MCPTelemetryConfig above. +# +# The telemetryConfigRef replaces the deprecated inline spec.telemetry field. +# serviceName provides a unique OTel service name for this server's telemetry. apiVersion: toolhive.stacklok.dev/v1alpha1 kind: MCPServer metadata: @@ -20,20 +41,6 @@ spec: requests: cpu: "50m" memory: "64Mi" - # Deprecated: inline telemetry block. Migrate to telemetryConfigRef. - telemetry: - openTelemetry: - # Point to our OpenTelemetry Collector - enabled: true - endpoint: otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4318 - serviceName: mcp-fetch-server - headers: - - "x-test-header=toolhive-demo" - insecure: true # Using HTTP collector endpoint - metrics: - enabled: true - tracing: - enabled: true - samplingRate: "1.0" - prometheus: - enabled: true + telemetryConfigRef: + name: basic-telemetry + serviceName: mcp-fetch-server diff --git a/examples/operator/telemetry-configs/basic-telemetry-config.yaml b/examples/operator/telemetry-configs/basic-telemetry-config.yaml deleted file mode 100644 index 91fefa741e..0000000000 --- a/examples/operator/telemetry-configs/basic-telemetry-config.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Basic MCPTelemetryConfig with OTLP tracing and metrics. -# -# This is the simplest useful MCPTelemetryConfig: it enables OpenTelemetry -# with OTLP export (tracing + metrics) and Prometheus scraping. -# -# MCPServer resources in the same namespace can reference this config via -# spec.telemetryConfigRef.name, providing a unique serviceName per server. -apiVersion: toolhive.stacklok.dev/v1alpha1 -kind: MCPTelemetryConfig -metadata: - name: basic-telemetry - namespace: toolhive-system -spec: - openTelemetry: - enabled: true - endpoint: otel-collector.monitoring.svc.cluster.local:4318 - insecure: true - tracing: - enabled: true - samplingRate: "0.1" - metrics: - enabled: true - prometheus: - enabled: true diff --git a/examples/operator/telemetry-configs/mcpserver-with-telemetry-config-ref.yaml b/examples/operator/telemetry-configs/mcpserver-with-telemetry-config-ref.yaml deleted file mode 100644 index fb9f0699fc..0000000000 --- a/examples/operator/telemetry-configs/mcpserver-with-telemetry-config-ref.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# MCPServer that references a shared MCPTelemetryConfig. -# -# Instead of defining telemetry inline (deprecated), this MCPServer uses -# spec.telemetryConfigRef to point to a shared MCPTelemetryConfig in the -# same namespace. The serviceName field provides a unique OTel service name -# for this server's traces and metrics. -# -# The referenced MCPTelemetryConfig ("basic-telemetry") must exist in -# toolhive-system before this MCPServer is created. -apiVersion: toolhive.stacklok.dev/v1alpha1 -kind: MCPServer -metadata: - name: fetch - namespace: toolhive-system -spec: - image: ghcr.io/stackloklabs/gofetch/server - transport: streamable-http - proxyPort: 8080 - mcpPort: 8080 - resources: - limits: - cpu: "100m" - memory: "128Mi" - requests: - cpu: "50m" - memory: "64Mi" - telemetryConfigRef: - name: basic-telemetry - serviceName: mcp-fetch-server diff --git a/examples/operator/telemetry-configs/production-telemetry-config.yaml b/examples/operator/telemetry-configs/production-telemetry-config.yaml deleted file mode 100644 index aa19e70730..0000000000 --- a/examples/operator/telemetry-configs/production-telemetry-config.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Production-ready MCPTelemetryConfig with all features. -# -# Demonstrates: -# - Resource attributes for environment tagging -# - Secret-backed sensitive headers for authenticated OTLP export -# - Tracing with a conservative sampling rate -# - Prometheus scraping enabled -# -# Prerequisites: -# kubectl create secret generic otel-auth-secret \ -# --namespace=mcp-platform \ -# --from-literal=bearer-token='Bearer ' -apiVersion: toolhive.stacklok.dev/v1alpha1 -kind: MCPTelemetryConfig -metadata: - name: production-telemetry - namespace: mcp-platform -spec: - openTelemetry: - enabled: true - endpoint: otel-collector.monitoring.svc.cluster.local:4318 - resourceAttributes: - service.namespace: toolhive - deployment.environment: production - sensitiveHeaders: - - name: Authorization - secretKeyRef: - name: otel-auth-secret - key: bearer-token - tracing: - enabled: true - samplingRate: "0.1" - metrics: - enabled: true - prometheus: - enabled: true From bde989d3d069c01abc4d33ca8f4ec6ad84636a5e Mon Sep 17 00:00:00 2001 From: Chris Burns <29541485+ChrisJBurns@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:38:46 +0100 Subject: [PATCH 3/7] Address review: gate OTel on Enabled flag, add endpoint CEL validation Fix two issues from PR review: - NormalizeMCPTelemetryConfig now gates OTel config on Enabled flag, matching ConvertTelemetryConfig behavior. Setting enabled: false correctly disables OTLP even if sub-fields are populated. - Add CEL validation rejecting endpoint when neither tracing nor metrics is enabled, catching the misconfiguration at admission time instead of failing at proxy runner startup. Add test cases covering both scenarios. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../api/v1alpha1/mcptelemetryconfig_types.go | 1 + .../pkg/spectoconfig/telemetry.go | 5 +-- .../pkg/spectoconfig/telemetry_test.go | 32 +++++++++++++++++++ ...hive.stacklok.dev_mcptelemetryconfigs.yaml | 4 +++ ...hive.stacklok.dev_mcptelemetryconfigs.yaml | 4 +++ 5 files changed, 44 insertions(+), 2 deletions(-) diff --git a/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go b/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go index cbbad5e2e3..9a0fd0d80e 100644 --- a/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go +++ b/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go @@ -31,6 +31,7 @@ type SensitiveHeader struct { // - Adds ResourceAttributes for shared OTel resource attributes // // +kubebuilder:validation:XValidation:rule="!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, !(sh.name in self.headers))",message="a header name cannot appear in both headers and sensitiveHeaders" +// +kubebuilder:validation:XValidation:rule=”!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)”,message=”endpoint requires at least one of tracing or metrics to be enabled” // //nolint:lll // CEL validation rules exceed line length limit type MCPTelemetryOTelConfig struct { diff --git a/cmd/thv-operator/pkg/spectoconfig/telemetry.go b/cmd/thv-operator/pkg/spectoconfig/telemetry.go index a2249b03b6..f8741d04f5 100644 --- a/cmd/thv-operator/pkg/spectoconfig/telemetry.go +++ b/cmd/thv-operator/pkg/spectoconfig/telemetry.go @@ -116,8 +116,9 @@ func NormalizeMCPTelemetryConfig( config := &telemetry.Config{} - // Map nested OpenTelemetry fields to flat telemetry.Config - if spec.OpenTelemetry != nil { + // Map nested OpenTelemetry fields to flat telemetry.Config. + // Only configure OTLP when Enabled is true, matching ConvertTelemetryConfig behavior. + if spec.OpenTelemetry != nil && spec.OpenTelemetry.Enabled { otel := spec.OpenTelemetry config.Endpoint = otel.Endpoint config.Insecure = otel.Insecure diff --git a/cmd/thv-operator/pkg/spectoconfig/telemetry_test.go b/cmd/thv-operator/pkg/spectoconfig/telemetry_test.go index 7a1d5534b2..9698c3ed37 100644 --- a/cmd/thv-operator/pkg/spectoconfig/telemetry_test.go +++ b/cmd/thv-operator/pkg/spectoconfig/telemetry_test.go @@ -276,6 +276,38 @@ func TestNormalizeMCPTelemetryConfig(t *testing.T) { ServiceName: "fallback", }, }, + { + name: "enabled false skips OTel config entirely", + spec: &v1alpha1.MCPTelemetryConfigSpec{ + OpenTelemetry: &v1alpha1.MCPTelemetryOTelConfig{ + Enabled: false, + Endpoint: "https://otel-collector:4317", + Tracing: &v1alpha1.OpenTelemetryTracingConfig{Enabled: true}, + Metrics: &v1alpha1.OpenTelemetryMetricsConfig{Enabled: true}, + }, + }, + serviceNameOverride: "my-service", + defaultServiceName: "fallback", + expected: &telemetry.Config{ + ServiceName: "my-service", + }, + }, + { + name: "endpoint with nil tracing and metrics produces no tracing or metrics", + spec: &v1alpha1.MCPTelemetryConfigSpec{ + OpenTelemetry: &v1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "otel-collector:4317", + // Tracing and Metrics are nil + }, + }, + serviceNameOverride: "", + defaultServiceName: "test-server", + expected: &telemetry.Config{ + Endpoint: "otel-collector:4317", + ServiceName: "test-server", + }, + }, } for _, tt := range tests { diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index 0ae849dc83..19b8fcd605 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -163,6 +163,10 @@ spec: - message: a header name cannot appear in both headers and sensitiveHeaders rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, !(sh.name in self.headers))' + - message: ”endpoint requires at least one of tracing or metrics to + be enabled” + rule: ”!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) + && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)” prometheus: description: Prometheus defines Prometheus-specific configuration properties: diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index 56f3fe59cb..6db682e222 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -166,6 +166,10 @@ spec: - message: a header name cannot appear in both headers and sensitiveHeaders rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, !(sh.name in self.headers))' + - message: ”endpoint requires at least one of tracing or metrics to + be enabled” + rule: ”!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) + && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)” prometheus: description: Prometheus defines Prometheus-specific configuration properties: From 050f37b2ea5d7101daef9357d47af6f6da4b59cb Mon Sep 17 00:00:00 2001 From: Chris Burns <29541485+ChrisJBurns@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:42:43 +0100 Subject: [PATCH 4/7] Fix CEL rule YAML quoting in CRD manifests Use single-quoted YAML for the endpoint validation CEL rule to match the quoting style of other CEL rules in the chart. Double-quoted YAML caused the rule value to include literal quote characters, failing CRD installation with a CEL syntax error. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml | 7 +++---- .../toolhive.stacklok.dev_mcptelemetryconfigs.yaml | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index 19b8fcd605..1de8ba5387 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -163,10 +163,9 @@ spec: - message: a header name cannot appear in both headers and sensitiveHeaders rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, !(sh.name in self.headers))' - - message: ”endpoint requires at least one of tracing or metrics to - be enabled” - rule: ”!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) - && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)” + - message: endpoint requires at least one of tracing or metrics to be enabled + rule: '!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) + && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)' prometheus: description: Prometheus defines Prometheus-specific configuration properties: diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index 6db682e222..1f624e9526 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -166,10 +166,9 @@ spec: - message: a header name cannot appear in both headers and sensitiveHeaders rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, !(sh.name in self.headers))' - - message: ”endpoint requires at least one of tracing or metrics to - be enabled” - rule: ”!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) - && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)” + - message: endpoint requires at least one of tracing or metrics to be enabled + rule: '!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) + && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)' prometheus: description: Prometheus defines Prometheus-specific configuration properties: From 7f4ede171735b68152aad258c6b2f68e70a9361a Mon Sep 17 00:00:00 2001 From: Chris Burns <29541485+ChrisJBurns@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:43:49 +0100 Subject: [PATCH 5/7] Fix OTel collector endpoint in telemetry example Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/operator/mcp-servers/mcpserver_fetch_otel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml b/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml index 8b919a70d1..9d71e76e55 100644 --- a/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml +++ b/examples/operator/mcp-servers/mcpserver_fetch_otel.yaml @@ -10,7 +10,7 @@ metadata: spec: openTelemetry: enabled: true - endpoint: otel-collector.monitoring.svc.cluster.local:4318 + endpoint: otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4318 insecure: true tracing: enabled: true From ebac7e94c202ab14a58a1995ab92af8008791bfa Mon Sep 17 00:00:00 2001 From: Chris Burns <29541485+ChrisJBurns@users.noreply.github.com> Date: Wed, 1 Apr 2026 23:05:33 +0100 Subject: [PATCH 6/7] Move endpoint validation to Validate(), fix smart quote corruption The gci linter was replacing ASCII quotes with Unicode smart quotes (U+201C/U+201D) in kubebuilder annotations, breaking controller-gen CRD generation. Additionally, the CEL rule for endpoint validation generated double-quoted YAML that the Helm wrapper mishandled. Move the endpoint-requires-signals validation from a CEL rule to the programmatic Validate() method, which avoids both issues and provides clearer error messages. Add test case for this validation. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../api/v1alpha1/mcptelemetryconfig_types.go | 22 ++++++++++++++++++- .../mcptelemetryconfig_controller_test.go | 13 +++++++++++ ...hive.stacklok.dev_mcptelemetryconfigs.yaml | 6 ++--- ...hive.stacklok.dev_mcptelemetryconfigs.yaml | 6 ++--- 4 files changed, 40 insertions(+), 7 deletions(-) diff --git a/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go b/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go index 9a0fd0d80e..f31d57b444 100644 --- a/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go +++ b/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go @@ -31,7 +31,6 @@ type SensitiveHeader struct { // - Adds ResourceAttributes for shared OTel resource attributes // // +kubebuilder:validation:XValidation:rule="!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, !(sh.name in self.headers))",message="a header name cannot appear in both headers and sensitiveHeaders" -// +kubebuilder:validation:XValidation:rule=”!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)”,message=”endpoint requires at least one of tracing or metrics to be enabled” // //nolint:lll // CEL validation rules exceed line length limit type MCPTelemetryOTelConfig struct { @@ -179,9 +178,30 @@ type MCPTelemetryConfigReference struct { // CEL catches issues at API admission time, but this method also validates // stored objects to catch any that bypassed CEL or were stored before CEL rules were added. func (r *MCPTelemetryConfig) Validate() error { + if err := r.validateEndpointRequiresSignals(); err != nil { + return err + } return r.validateSensitiveHeaders() } +// validateEndpointRequiresSignals rejects an endpoint when neither tracing nor metrics is enabled. +// Without this check the config would pass CRD validation but fail at runtime in telemetry.NewProvider. +func (r *MCPTelemetryConfig) validateEndpointRequiresSignals() error { + if r.Spec.OpenTelemetry == nil { + return nil + } + otel := r.Spec.OpenTelemetry + if otel.Endpoint == "" { + return nil + } + tracingEnabled := otel.Tracing != nil && otel.Tracing.Enabled + metricsEnabled := otel.Metrics != nil && otel.Metrics.Enabled + if !tracingEnabled && !metricsEnabled { + return fmt.Errorf("endpoint requires at least one of tracing or metrics to be enabled") + } + return nil +} + // validateSensitiveHeaders validates sensitive header entries and checks for overlap with plaintext headers. func (r *MCPTelemetryConfig) validateSensitiveHeaders() error { if r.Spec.OpenTelemetry == nil { diff --git a/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go b/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go index 9c37e73899..3c090ec735 100644 --- a/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go +++ b/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go @@ -498,6 +498,19 @@ func TestMCPTelemetryConfig_Validate(t *testing.T) { }, expectError: true, }, + { + name: "invalid endpoint without tracing or metrics", + config: &mcpv1alpha1.MCPTelemetryConfig{ + Spec: mcpv1alpha1.MCPTelemetryConfigSpec{ + OpenTelemetry: &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "otel-collector:4317", + // No Tracing or Metrics configured + }, + }, + }, + expectError: true, + }, { name: "invalid empty secret ref name", config: &mcpv1alpha1.MCPTelemetryConfig{ diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index 1de8ba5387..fbf048735b 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -22,6 +22,9 @@ spec: - jsonPath: .spec.openTelemetry.endpoint name: Endpoint type: string + - jsonPath: .status.conditions[?(@.type=='Valid')].status + name: Ready + type: string - jsonPath: .spec.openTelemetry.tracing.enabled name: Tracing type: boolean @@ -163,9 +166,6 @@ spec: - message: a header name cannot appear in both headers and sensitiveHeaders rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, !(sh.name in self.headers))' - - message: endpoint requires at least one of tracing or metrics to be enabled - rule: '!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) - && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)' prometheus: description: Prometheus defines Prometheus-specific configuration properties: diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index 1f624e9526..57482db653 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -25,6 +25,9 @@ spec: - jsonPath: .spec.openTelemetry.endpoint name: Endpoint type: string + - jsonPath: .status.conditions[?(@.type=='Valid')].status + name: Ready + type: string - jsonPath: .spec.openTelemetry.tracing.enabled name: Tracing type: boolean @@ -166,9 +169,6 @@ spec: - message: a header name cannot appear in both headers and sensitiveHeaders rule: '!has(self.headers) || !has(self.sensitiveHeaders) || self.sensitiveHeaders.all(sh, !(sh.name in self.headers))' - - message: endpoint requires at least one of tracing or metrics to be enabled - rule: '!has(self.endpoint) || size(self.endpoint) == 0 || (has(self.tracing) - && self.tracing.enabled) || (has(self.metrics) && self.metrics.enabled)' prometheus: description: Prometheus defines Prometheus-specific configuration properties: From 92f1e789d1dc64f3f436b2b5ca74fd7332315e74 Mon Sep 17 00:00:00 2001 From: Chris Burns <29541485+ChrisJBurns@users.noreply.github.com> Date: Thu, 2 Apr 2026 18:20:23 +0100 Subject: [PATCH 7/7] Reuse shared WorkloadReference type from MCPOIDCConfig Remove duplicate WorkloadReference definition from mcptelemetryconfig_types.go and use the shared type already defined in mcpoidcconfig_types.go. Drop the Namespace field (cross-namespace refs are not supported) to match the shared type's schema. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../api/v1alpha1/mcptelemetryconfig_types.go | 12 ---- .../mcptelemetryconfig_controller.go | 15 ++--- .../mcptelemetryconfig_controller_test.go | 4 +- ...hive.stacklok.dev_mcptelemetryconfigs.yaml | 18 +++--- ...hive.stacklok.dev_mcptelemetryconfigs.yaml | 18 +++--- docs/operator/crd-api.md | 59 ++++++++++++------- 6 files changed, 65 insertions(+), 61 deletions(-) diff --git a/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go b/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go index f31d57b444..2383fb7be6 100644 --- a/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go +++ b/cmd/thv-operator/api/v1alpha1/mcptelemetryconfig_types.go @@ -95,18 +95,6 @@ type MCPTelemetryConfigSpec struct { Prometheus *PrometheusConfig `json:"prometheus,omitempty"` } -// WorkloadReference identifies a Kubernetes workload that references a shared config resource. -type WorkloadReference struct { - // Kind is the resource kind (e.g., "MCPServer") - Kind string `json:"kind"` - - // Namespace is the resource namespace - Namespace string `json:"namespace"` - - // Name is the resource name - Name string `json:"name"` -} - // MCPTelemetryConfigStatus defines the observed state of MCPTelemetryConfig type MCPTelemetryConfigStatus struct { // Conditions represent the latest available observations of the MCPTelemetryConfig's state diff --git a/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go b/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go index a6dba2076d..27ca5da918 100644 --- a/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go +++ b/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go @@ -196,7 +196,7 @@ func (r *MCPTelemetryConfigReconciler) handleDeletion( if len(referencingWorkloads) > 0 { names := make([]string, 0, len(referencingWorkloads)) for _, ref := range referencingWorkloads { - names = append(names, fmt.Sprintf("%s/%s", ref.Namespace, ref.Name)) + names = append(names, fmt.Sprintf("%s/%s", ref.Kind, ref.Name)) } msg := fmt.Sprintf("cannot delete: still referenced by MCPServer(s): %v", names) logger.Info(msg, "telemetryConfig", telemetryConfig.Name) @@ -239,19 +239,12 @@ func (r *MCPTelemetryConfigReconciler) findReferencingWorkloads( if server.Spec.TelemetryConfigRef != nil && server.Spec.TelemetryConfigRef.Name == telemetryConfig.Name { refs = append(refs, mcpv1alpha1.WorkloadReference{ - Kind: "MCPServer", - Namespace: server.Namespace, - Name: server.Name, + Kind: "MCPServer", + Name: server.Name, }) } } slices.SortFunc(refs, func(a, b mcpv1alpha1.WorkloadReference) int { - if a.Namespace != b.Namespace { - if a.Namespace < b.Namespace { - return -1 - } - return 1 - } if a.Name < b.Name { return -1 } @@ -266,6 +259,6 @@ func (r *MCPTelemetryConfigReconciler) findReferencingWorkloads( // workloadRefsEqual compares two WorkloadReference slices for equality. func workloadRefsEqual(a, b []mcpv1alpha1.WorkloadReference) bool { return slices.EqualFunc(a, b, func(x, y mcpv1alpha1.WorkloadReference) bool { - return x.Kind == y.Kind && x.Namespace == y.Namespace && x.Name == y.Name + return x.Kind == y.Kind && x.Name == y.Name }) } diff --git a/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go b/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go index 3c090ec735..2fb5f1cdc9 100644 --- a/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go +++ b/cmd/thv-operator/controllers/mcptelemetryconfig_controller_test.go @@ -704,8 +704,8 @@ func TestMCPTelemetryConfigReconciler_ReferenceTracking(t *testing.T) { // ReferencingWorkloads should list server-a and server-b (sorted), but not server-c assert.Equal(t, []mcpv1alpha1.WorkloadReference{ - {Kind: "MCPServer", Namespace: "default", Name: "server-a"}, - {Kind: "MCPServer", Namespace: "default", Name: "server-b"}, + {Kind: "MCPServer", Name: "server-a"}, + {Kind: "MCPServer", Name: "server-b"}, }, updated.Status.ReferencingWorkloads) } diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index fbf048735b..37b8533df6 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -250,22 +250,24 @@ spec: description: ReferencingWorkloads lists workloads that reference this MCPTelemetryConfig items: - description: WorkloadReference identifies a Kubernetes workload - that references a shared config resource. + description: |- + WorkloadReference identifies a workload that references a shared configuration resource. + Namespace is implicit — cross-namespace references are not supported. properties: kind: - description: Kind is the resource kind (e.g., "MCPServer") + description: Kind is the type of workload resource + enum: + - MCPServer + - VirtualMCPServer + - MCPRemoteProxy type: string name: - description: Name is the resource name - type: string - namespace: - description: Namespace is the resource namespace + description: Name is the name of the workload resource + minLength: 1 type: string required: - kind - name - - namespace type: object type: array type: object diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml index 57482db653..6af87d6be0 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcptelemetryconfigs.yaml @@ -253,22 +253,24 @@ spec: description: ReferencingWorkloads lists workloads that reference this MCPTelemetryConfig items: - description: WorkloadReference identifies a Kubernetes workload - that references a shared config resource. + description: |- + WorkloadReference identifies a workload that references a shared configuration resource. + Namespace is implicit — cross-namespace references are not supported. properties: kind: - description: Kind is the resource kind (e.g., "MCPServer") + description: Kind is the type of workload resource + enum: + - MCPServer + - VirtualMCPServer + - MCPRemoteProxy type: string name: - description: Name is the resource name - type: string - namespace: - description: Namespace is the resource namespace + description: Name is the name of the workload resource + minLength: 1 type: string required: - kind - name - - namespace type: object type: array type: object diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index 030c8d5571..aec3cfec5d 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -698,7 +698,6 @@ Config holds the configuration for OpenTelemetry instrumentation. _Appears in:_ - [vmcp.config.Config](#vmcpconfigconfig) -- [api.v1alpha1.MCPTelemetryConfigSpec](#apiv1alpha1mcptelemetryconfigspec) | Field | Description | Default | Validation | | --- | --- | --- | --- | @@ -2336,10 +2335,8 @@ _Appears in:_ MCPTelemetryConfigSpec defines the desired state of MCPTelemetryConfig. -It embeds telemetry.Config from pkg/telemetry to eliminate the conversion -layer between CRD and application types. The environmentVariables field is -CLI-only and rejected by CEL validation; customAttributes is allowed for -setting shared OTel resource attributes (e.g., deployment.environment). +The spec uses a nested structure with openTelemetry and prometheus sub-objects +for clear separation of concerns. @@ -2348,19 +2345,8 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `endpoint` _string_ | Endpoint is the OTLP endpoint URL | | Optional: \{\}
| -| `serviceName` _string_ | ServiceName is the service name for telemetry.
When omitted, defaults to the server name (e.g., VirtualMCPServer name). | | Optional: \{\}
| -| `serviceVersion` _string_ | ServiceVersion is the service version for telemetry.
When omitted, defaults to the ToolHive version. | | Optional: \{\}
| -| `tracingEnabled` _boolean_ | TracingEnabled controls whether distributed tracing is enabled.
When false, no tracer provider is created even if an endpoint is configured. | false | Optional: \{\}
| -| `metricsEnabled` _boolean_ | MetricsEnabled controls whether OTLP metrics are enabled.
When false, OTLP metrics are not sent even if an endpoint is configured.
This is independent of EnablePrometheusMetricsPath. | false | Optional: \{\}
| -| `samplingRate` _string_ | SamplingRate is the trace sampling rate (0.0-1.0) as a string.
Only used when TracingEnabled is true.
Example: "0.05" for 5% sampling. | 0.05 | Optional: \{\}
| -| `headers` _object (keys:string, values:string)_ | Headers contains authentication headers for the OTLP endpoint. | | Optional: \{\}
| -| `insecure` _boolean_ | Insecure indicates whether to use HTTP instead of HTTPS for the OTLP endpoint. | false | Optional: \{\}
| -| `enablePrometheusMetricsPath` _boolean_ | EnablePrometheusMetricsPath controls whether to expose Prometheus-style /metrics endpoint.
The metrics are served on the main transport port at /metrics.
This is separate from OTLP metrics which are sent to the Endpoint. | false | Optional: \{\}
| -| `environmentVariables` _string array_ | EnvironmentVariables is a list of environment variable names that should be
included in telemetry spans as attributes. Only variables in this list will
be read from the host machine and included in spans for observability.
Example: ["NODE_ENV", "DEPLOYMENT_ENV", "SERVICE_VERSION"] | | Optional: \{\}
| -| `customAttributes` _object (keys:string, values:string)_ | CustomAttributes contains custom resource attributes to be added to all telemetry signals.
These are parsed from CLI flags (--otel-custom-attributes) or environment variables
(OTEL_RESOURCE_ATTRIBUTES) as key=value pairs. | | Optional: \{\}
| -| `useLegacyAttributes` _boolean_ | UseLegacyAttributes controls whether legacy (pre-MCP OTEL semconv) attribute names
are emitted alongside the new standard attribute names. When true, spans include both
old and new attribute names for backward compatibility with existing dashboards.
Currently defaults to true; this will change to false in a future release. | true | Optional: \{\}
| -| `sensitiveHeaders` _[api.v1alpha1.SensitiveHeader](#apiv1alpha1sensitiveheader) array_ | SensitiveHeaders contains headers whose values are stored in Kubernetes Secrets.
Use this for credential headers (e.g., API keys, bearer tokens) instead of
embedding secrets in the headers field. | | Optional: \{\}
| +| `openTelemetry` _[api.v1alpha1.MCPTelemetryOTelConfig](#apiv1alpha1mcptelemetryotelconfig)_ | OpenTelemetry defines OpenTelemetry configuration (OTLP endpoint, tracing, metrics) | | Optional: \{\}
| +| `prometheus` _[api.v1alpha1.PrometheusConfig](#apiv1alpha1prometheusconfig)_ | Prometheus defines Prometheus-specific configuration | | Optional: \{\}
| #### api.v1alpha1.MCPTelemetryConfigStatus @@ -2379,7 +2365,36 @@ _Appears in:_ | `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#condition-v1-meta) array_ | Conditions represent the latest available observations of the MCPTelemetryConfig's state | | Optional: \{\}
| | `observedGeneration` _integer_ | ObservedGeneration is the most recent generation observed for this MCPTelemetryConfig. | | Optional: \{\}
| | `configHash` _string_ | ConfigHash is a hash of the current configuration for change detection | | Optional: \{\}
| -| `referencingServers` _string array_ | ReferencingServers is a list of MCPServer resources that reference this MCPTelemetryConfig | | Optional: \{\}
| +| `referencingWorkloads` _[api.v1alpha1.WorkloadReference](#apiv1alpha1workloadreference) array_ | ReferencingWorkloads lists workloads that reference this MCPTelemetryConfig | | Optional: \{\}
| + + +#### api.v1alpha1.MCPTelemetryOTelConfig + + + +MCPTelemetryOTelConfig defines OpenTelemetry configuration for shared MCPTelemetryConfig resources. +Unlike OpenTelemetryConfig (used by inline MCPServer telemetry), this type: + - Omits ServiceName (per-server field set via MCPTelemetryConfigReference) + - Uses map[string]string for Headers (not []string) + - Adds SensitiveHeaders for Kubernetes Secret-backed credentials + - Adds ResourceAttributes for shared OTel resource attributes + + + +_Appears in:_ +- [api.v1alpha1.MCPTelemetryConfigSpec](#apiv1alpha1mcptelemetryconfigspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `enabled` _boolean_ | Enabled controls whether OpenTelemetry is enabled | false | Optional: \{\}
| +| `endpoint` _string_ | Endpoint is the OTLP endpoint URL for tracing and metrics | | Optional: \{\}
| +| `insecure` _boolean_ | Insecure indicates whether to use HTTP instead of HTTPS for the OTLP endpoint | false | Optional: \{\}
| +| `headers` _object (keys:string, values:string)_ | Headers contains authentication headers for the OTLP endpoint.
For secret-backed credentials, use sensitiveHeaders instead. | | Optional: \{\}
| +| `sensitiveHeaders` _[api.v1alpha1.SensitiveHeader](#apiv1alpha1sensitiveheader) array_ | SensitiveHeaders contains headers whose values are stored in Kubernetes Secrets.
Use this for credential headers (e.g., API keys, bearer tokens) instead of
embedding secrets in the headers field. | | Optional: \{\}
| +| `resourceAttributes` _object (keys:string, values:string)_ | ResourceAttributes contains custom resource attributes to be added to all telemetry signals.
These become OTel resource attributes (e.g., deployment.environment, service.namespace).
Note: service.name is intentionally excluded — it is set per-server via
MCPTelemetryConfigReference.ServiceName. | | Optional: \{\}
| +| `metrics` _[api.v1alpha1.OpenTelemetryMetricsConfig](#apiv1alpha1opentelemetrymetricsconfig)_ | Metrics defines OpenTelemetry metrics-specific configuration | | Optional: \{\}
| +| `tracing` _[api.v1alpha1.OpenTelemetryTracingConfig](#apiv1alpha1opentelemetrytracingconfig)_ | Tracing defines OpenTelemetry tracing configuration | | Optional: \{\}
| +| `useLegacyAttributes` _boolean_ | UseLegacyAttributes controls whether legacy attribute names are emitted alongside
the new MCP OTEL semantic convention names. Defaults to true for backward compatibility.
This will change to false in a future release and eventually be removed. | true | Optional: \{\}
| #### api.v1alpha1.MCPToolConfig @@ -2618,6 +2633,7 @@ OpenTelemetryMetricsConfig defines OpenTelemetry metrics configuration _Appears in:_ +- [api.v1alpha1.MCPTelemetryOTelConfig](#apiv1alpha1mcptelemetryotelconfig) - [api.v1alpha1.OpenTelemetryConfig](#apiv1alpha1opentelemetryconfig) | Field | Description | Default | Validation | @@ -2634,6 +2650,7 @@ OpenTelemetryTracingConfig defines OpenTelemetry tracing configuration _Appears in:_ +- [api.v1alpha1.MCPTelemetryOTelConfig](#apiv1alpha1mcptelemetryotelconfig) - [api.v1alpha1.OpenTelemetryConfig](#apiv1alpha1opentelemetryconfig) | Field | Description | Default | Validation | @@ -2724,6 +2741,7 @@ PrometheusConfig defines Prometheus-specific configuration _Appears in:_ +- [api.v1alpha1.MCPTelemetryConfigSpec](#apiv1alpha1mcptelemetryconfigspec) - [api.v1alpha1.TelemetryConfig](#apiv1alpha1telemetryconfig) | Field | Description | Default | Validation | @@ -3000,7 +3018,7 @@ referenced without embedding secrets inline in the MCPTelemetryConfig resource. _Appears in:_ -- [api.v1alpha1.MCPTelemetryConfigSpec](#apiv1alpha1mcptelemetryconfigspec) +- [api.v1alpha1.MCPTelemetryOTelConfig](#apiv1alpha1mcptelemetryotelconfig) | Field | Description | Default | Validation | | --- | --- | --- | --- | @@ -3631,6 +3649,7 @@ Namespace is implicit — cross-namespace references are not supported. _Appears in:_ - [api.v1alpha1.MCPOIDCConfigStatus](#apiv1alpha1mcpoidcconfigstatus) +- [api.v1alpha1.MCPTelemetryConfigStatus](#apiv1alpha1mcptelemetryconfigstatus) | Field | Description | Default | Validation | | --- | --- | --- | --- |