openshift · rexagod · Dec 25, 2025 · Dec 28, 2025 · Dec 29, 2025 · rexagod
diff --git a/test/extended/prometheus/collection_profiles.go b/test/extended/prometheus/collection_profiles.go
@@ -28,6 +28,7 @@ const (
 	collectionProfileFull         = "full"
 	collectionProfileDefault      = collectionProfileFull
 	collectionProfileMinimal      = "minimal"
+	collectionProfileTelemetry    = "telemetry"
 	collectionProfileNone         = ""
 
 	operatorName              = "cluster-monitoring-operator"
@@ -42,6 +43,7 @@ var (
 	collectionProfilesSupportedList = []string{
 		collectionProfileFull,
 		collectionProfileMinimal,
+		collectionProfileTelemetry,
 	}
 )
 
@@ -63,9 +65,6 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
 	tctx := context.Background()
 
 	g.BeforeAll(func() {
-		if !exutil.IsTechPreviewNoUpgrade(tctx, oc.AdminConfigClient()) {
-			g.Skip("skipping, this feature is only supported on TechPreviewNoUpgrade clusters")
-		}
 		var err error
 		r.kclient, err = kubernetes.NewForConfig(oc.AdminConfig())
 		if err != nil {
@@ -84,7 +83,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
 				if errors.IsNotFound(err) {
 					g.By("initially, creating a configuration for the operator as it did not exist")
 					operatorConfiguration = nil
-					return r.makeCollectionProfileConfigurationFor(tctx, collectionProfileDefault)
+					return r.makeCollectionProfileConfigurationFor(tctx, collectionProfileDefault, false)
 				}
 
 				return err
@@ -127,7 +126,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
 		profile := collectionProfileDefault
 
 		g.BeforeAll(func() {
-			err := r.makeCollectionProfileConfigurationFor(tctx, profile)
+			err := r.makeCollectionProfileConfigurationFor(tctx, profile, false)
 			o.Expect(err).To(o.BeNil())
 			o.Eventually(func() error {
 				enabled, err := r.isProfileEnabled(tctx, profile)
@@ -162,7 +161,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
 	g.Context("in a heterogeneous environment,", func() {
 		g.It("should expose information about the applied collection profile using meta-metrics", func() {
 			for _, profile := range collectionProfilesSupportedList {
-				err := r.makeCollectionProfileConfigurationFor(tctx, profile)
+				err := r.makeCollectionProfileConfigurationFor(tctx, profile, false)
 				o.Expect(err).To(o.BeNil())
 
 				o.Eventually(func() error {
@@ -181,7 +180,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
 		})
 		g.It("should have at least one implementation for each collection profile", func() {
 			for _, profile := range collectionProfilesSupportedList {
-				err := r.makeCollectionProfileConfigurationFor(tctx, profile)
+				err := r.makeCollectionProfileConfigurationFor(tctx, profile, false)
 				o.Expect(err).To(o.BeNil())
 
 				o.Eventually(func() error {
@@ -198,7 +197,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
 			}
 		})
 		g.It("should revert to default collection profile when an empty collection profile value is specified", func() {
-			err := r.makeCollectionProfileConfigurationFor(tctx, collectionProfileNone)
+			err := r.makeCollectionProfileConfigurationFor(tctx, collectionProfileNone, false)
 			o.Expect(err).To(o.BeNil())
 
 			o.Eventually(func() error {
@@ -219,7 +218,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
 		profile := collectionProfileMinimal
 
 		g.BeforeAll(func() {
-			err := r.makeCollectionProfileConfigurationFor(tctx, profile)
+			err := r.makeCollectionProfileConfigurationFor(tctx, profile, false)
 			o.Expect(err).To(o.BeNil())
 			o.Eventually(func() error {
 				enabled, err := r.isProfileEnabled(tctx, profile)
@@ -235,78 +234,88 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
 		})
 
 		g.It("should hide default metrics", func() {
-			appNameSelector := "app.kubernetes.io/name"
-			appName := "kube-state-metrics"
+			r.compareComponentRelabellingsToCountForProfile(tctx, "app.kubernetes.io/name", "kube-state-metrics", "kube_", profile)
+		})
+	})
+
+	g.Context("in a homogeneous telemetry environment,", func() {
+		profile := collectionProfileTelemetry
 
-			var kubeStateMetricsMonitor *prometheusoperatorv1.ServiceMonitor
+		g.BeforeAll(func() {
+			err := r.makeCollectionProfileConfigurationFor(tctx, profile, true)
+			o.Expect(err).To(o.BeNil())
 			o.Eventually(func() error {
-				monitors, err := r.fetchMonitorsFor(tctx, [2]string{collectionProfileFeatureLabel, profile}, [2]string{appNameSelector, appName})
+				enabled, err := r.isProfileEnabled(tctx, profile)
 				if err != nil {
-					return err
+					return fmt.Errorf("encountered error while checking if profile %q is enabled: %v", profile, err)
 				}
-				if len(monitors.Items) == 0 {
-					return fmt.Errorf("no monitors found with collection profile: %q and %#v=%q", profile, appNameSelector, appName)
+				if !enabled {
+					return fmt.Errorf("collection profile %q is not enabled", profile)
 				}
-				if len(monitors.Items) > 1 {
-					return fmt.Errorf("more than one monitor found with collection profile: %q and %#v=%q", profile, appNameSelector, appName)
+				if enabledErr, err := telemetryIsEnabled(tctx, r.kclient); err != nil {
+					return fmt.Errorf("failed to determine if telemetry is enabled: %v", err)
+				} else if enabledErr != nil {
+					return fmt.Errorf("telemetry is not enabled")
 				}
-				kubeStateMetricsMonitor = monitors.Items[0]
 
 				return nil
 			}, pollTimeout, pollInterval).Should(o.BeNil())
+		})
 
-			var kubeStateMetricsMainMetrics []string
-			kubeStateMetricsMonitorSpec := kubeStateMetricsMonitor.Spec
-			kubeStateMetricsMonitorSpecEndpoints := kubeStateMetricsMonitorSpec.Endpoints
-			if len(kubeStateMetricsMonitorSpecEndpoints) != 0 {
-				kubeStateMetricsMonitorSpecEndpoints0Relabelings := kubeStateMetricsMonitorSpecEndpoints[0].MetricRelabelConfigs
-				if len(kubeStateMetricsMonitorSpecEndpoints0Relabelings) != 0 {
-					for _, relabeling := range kubeStateMetricsMonitorSpecEndpoints0Relabelings {
-						// NOTE: This should accommodate for future changes to the relabeling scope.
-						if relabeling.Action == "keep" &&
-							len(relabeling.SourceLabels) == 1 &&
-							relabeling.SourceLabels[0] == "__name__" {
-							regexpString := relabeling.Regex
-							kubeRegex := regexp.MustCompile(`(?U)(kube_.*)[|,)]`)
-							kubeMetrics := kubeRegex.FindAllString(regexpString, -1)
-							for _, metric := range kubeMetrics {
-								// Golang doesn't support negative lookaheads.
-								if strings.HasPrefix(metric, "kube_state_metrics") {
-									continue
-								}
-								kubeStateMetricsMainMetrics = append(kubeStateMetricsMainMetrics, metric)
-							}
-						}
-					}
-				}
+		g.It("should hide default metrics", func() {
+			r.compareComponentRelabellingsToCountForProfile(tctx, "app.kubernetes.io/name", "kube-state-metrics", "kube_", profile)
+		})
+
+		// this test case ensures that the (a) opted-in (in-cluster components or
+		// otherwise) or (b) full/none/default collection profile monitors
+		// collectively expose the same volume of telemetry metrics as they did
+		// before
+		g.It("should not drop any telemetry metric", func() {
+			telemetryConfigMap, err := r.kclient.CoreV1().ConfigMaps("openshift-monitoring").Get(tctx, "telemetry-config", metav1.GetOptions{})
+			o.Expect(err).To(o.BeNil())
+
+			var telemetryConfig struct {
+				Matches []string `yaml:"matches"`
+			}
+			err = yaml.Unmarshal([]byte(telemetryConfigMap.Data["metrics.yaml"]), &telemetryConfig)
+			o.Expect(err).To(o.BeNil())
+
+			var telemetryMetricsCountQuery string
+			for _, match := range telemetryConfig.Matches {
+				telemetryMetricsCountQuery += fmt.Sprintf("%s or ", match)
 			}
-			o.Expect(len(kubeStateMetricsMainMetrics)).To(o.BeNumerically(">", 0))
+			telemetryMetricsCountQuery = fmt.Sprintf("count(%s)", telemetryMetricsCountQuery[:len(telemetryMetricsCountQuery)-4])
 
 			o.Eventually(func() error {
-				postRelabelingMetric := "scrape_samples_post_metric_relabeling"
-				relabelingMetricQuery := fmt.Sprintf("sum(%s{job=\"%s\",endpoint=\"https-main\",namespace=\"%s\"})", postRelabelingMetric, appName, operatorNamespaceName)
-				queryResponse, err := helper.RunQuery(tctx, r.pclient, relabelingMetricQuery)
+				telemetryMetricsCountQueryResponse, err := helper.RunQuery(tctx, r.pclient, telemetryMetricsCountQuery)
 				if err != nil {
-					return err
+					return fmt.Errorf("failed to run constructed telemetry metrics query: %v", err)
 				}
-				if len(queryResponse.Data.Result) == 0 {
-					return fmt.Errorf("no result found for metric %q", postRelabelingMetric)
+				if len(telemetryMetricsCountQueryResponse.Data.Result) == 0 {
+					return fmt.Errorf("no result found for constructed telemetry metrics query")
 				}
-				wantCount := int(queryResponse.Data.Result[0].Value)
+				gotCount := int(telemetryMetricsCountQueryResponse.Data.Result[0].Value)
 
-				kubeStateMetricsMainMetricsString := strings.Join(kubeStateMetricsMainMetrics, "")
-				kubeStateMetricsMainMetricsCountQuery := fmt.Sprintf("count({__name__=~\"%s\"})", kubeStateMetricsMainMetricsString[:len(kubeStateMetricsMainMetricsString)-1 /* drop the last "|" or ")" */])
-				queryResponse, err = helper.RunQuery(tctx, r.pclient, kubeStateMetricsMainMetricsCountQuery)
+				telemetrySelectedSeriesCountQuery := "cluster:telemetry_selected_series:count"
+				telemetrySelectedSeriesCountQueryResponse, err := helper.RunQuery(tctx, r.pclient, telemetrySelectedSeriesCountQuery)
 				if err != nil {
-					return err
+					return fmt.Errorf("failed to run metric %q: %v", telemetrySelectedSeriesCountQuery, err)
 				}
-				if len(queryResponse.Data.Result) == 0 {
-					return fmt.Errorf("no result found for metric %q", kubeStateMetricsMainMetricsCountQuery)
+				if len(telemetrySelectedSeriesCountQueryResponse.Data.Result) == 0 {
+					return fmt.Errorf("no result found for metric %q", telemetrySelectedSeriesCountQuery)
 				}
-				gotCount := int(queryResponse.Data.Result[0].Value)
-
-				if gotCount != wantCount {
-					return fmt.Errorf("got %v, want %v", gotCount, wantCount)
+				wantCount := int(telemetrySelectedSeriesCountQueryResponse.Data.Result[0].Value)
+				largerCount, smallerCount := gotCount, wantCount
+				if wantCount > gotCount {
+					largerCount, smallerCount = smallerCount, largerCount
+				}
+				seriesDifference := float64(largerCount - smallerCount)
+				permittedVariance := 0.05
+				if largerCount == 0 {
+					return fmt.Errorf("both telemetry metric count and telemetry selected series count are zero")
+				}
+				if seriesDifference/float64(largerCount) > permittedVariance {
+					return fmt.Errorf("compared %q against %q: want %v, got %v", telemetrySelectedSeriesCountQuery, telemetryMetricsCountQuery, wantCount, gotCount)
 				}
 
 				return nil
@@ -315,6 +324,80 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
 	})
 })
 
+func (r runner) compareComponentRelabellingsToCountForProfile(tctx context.Context, appNameSelector, appName, metricSubsystem, profile string) {
+	var serviceMonitor *prometheusoperatorv1.ServiceMonitor
+	o.Eventually(func() error {
+		monitors, err := r.fetchMonitorsFor(tctx, [2]string{collectionProfileFeatureLabel, profile}, [2]string{appNameSelector, appName})
+		if err != nil {
+			return err
+		}
+		if len(monitors.Items) == 0 {
+			return fmt.Errorf("no monitors found with collection profile: %q and %#v=%q", profile, appNameSelector, appName)
+		}
+		if len(monitors.Items) > 1 {
+			return fmt.Errorf("more than one monitor found with collection profile: %q and %#v=%q", profile, appNameSelector, appName)
+		}
+		serviceMonitor = monitors.Items[0]
+
+		return nil
+	}, pollTimeout, pollInterval).Should(o.BeNil())
+
+	var metrics []string
+	spec := serviceMonitor.Spec
+	specEndpoints := spec.Endpoints
+	if len(specEndpoints) != 0 {
+		relabelings := specEndpoints[0].MetricRelabelConfigs
+		if len(relabelings) != 0 {
+			for _, relabeling := range relabelings {
+				if relabeling.Action == "keep" &&
+					len(relabeling.SourceLabels) == 1 &&
+					relabeling.SourceLabels[0] == "__name__" {
+					regexpString := relabeling.Regex
+					subsystemRegex := regexp.MustCompile("(?U)(" + metricSubsystem + ".*)[|,)]")
+					subsystemMetrics := subsystemRegex.FindAllString(regexpString, -1)
+					for _, metric := range subsystemMetrics {
+						if strings.HasPrefix(metric, strings.ReplaceAll(appName, "-", "_")) {
+							continue
+						}
+						metrics = append(metrics, metric)
+					}
+				}
+			}
+		}
+	}
+	o.Expect(len(metrics)).To(o.BeNumerically(">", 0))
+
+	o.Eventually(func() error {
+		postRelabelingMetric := "scrape_samples_post_metric_relabeling"
+		relabelingMetricQuery := fmt.Sprintf("sum(%s{job=\"%s\",endpoint=\"https-main\",namespace=\"%s\"})", postRelabelingMetric, appName, operatorNamespaceName)
+		queryResponse, err := helper.RunQuery(tctx, r.pclient, relabelingMetricQuery)
+		if err != nil {
+			return err
+		}
+		if len(queryResponse.Data.Result) == 0 {
+			return fmt.Errorf("no result found for metric %q", postRelabelingMetric)
+		}
+		wantCount := int(queryResponse.Data.Result[0].Value)
+
+		metricsString := strings.Join(metrics, "")
+		metricsCountQuery := fmt.Sprintf("count({__name__=~\"%s\"})", metricsString[:len(metricsString)-1 /* drop the last "|" or ")" */])
+		queryResponse, err = helper.RunQuery(tctx, r.pclient, metricsCountQuery)
+		if err != nil {
+			return err
+		}
+		if len(queryResponse.Data.Result) == 0 {
+			return fmt.Errorf("no result found for metric %q", metricsCountQuery)
+		}
+		gotCount := int(queryResponse.Data.Result[0].Value)
+
+		if gotCount != wantCount {
+			return fmt.Errorf("got %v, want %v", gotCount, wantCount)
+		}
+
+		return nil
+	}, pollTimeout, pollInterval).Should(o.BeNil())
+}
+
 func (r runner) isProfileEnabled(ctx context.Context, profile string) (bool, error) {
 	vectorExpression := "max(profile:cluster_monitoring_operator_collection_profile:max{profile=\"%s\"}) == 1"
 	queryResponse, err := helper.RunQuery(ctx, r.pclient, fmt.Sprintf(vectorExpression, profile))
@@ -340,7 +423,7 @@ func (r runner) fetchMonitorsFor(ctx context.Context, selectors ...[2]string) (*
 	})
 }
 
-func (r runner) makeCollectionProfileConfigurationFor(ctx context.Context, collectionProfile string) error {
+func (r runner) makeCollectionProfileConfigurationFor(ctx context.Context, collectionProfile string, enableTelemetry bool) error {
 	dataConfigYAMLPrometheusK8s := fmt.Sprintf("collectionProfile: %s", collectionProfile)
 	dataConfigYAMLPrometheusK8sStructured := map[string]interface{}{
 		"collectionProfile": collectionProfile,
@@ -378,6 +461,11 @@ func (r runner) makeCollectionProfileConfigurationFor(ctx context.Context, colle
 			} else {
 				gotDataConfigYAMLMap["prometheusK8s"].(map[string]interface{})["collectionProfile"] = collectionProfile
 			}
+			if enableTelemetry {
+				if _, ok := gotDataConfigYAMLMap["telemeterClient"]; ok {
+					gotDataConfigYAMLMap["telemeterClient"].(map[string]interface{})["enabled"] = true
+				}
+			}
 			gotDataConfigYAMLRaw, err := yaml.Marshal(gotDataConfigYAMLMap)
 			if err != nil {
 				return err