-
Notifications
You must be signed in to change notification settings - Fork 127
Telemetry enhancement #1677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Telemetry enhancement #1677
Conversation
|
CLA Assistant Lite bot CLA Assistant Lite bot All contributors have signed the COC ✍️ ✅ |
|
I have read the CLA Document and I hereby sign the CLA |
|
I have read the Code of Conduct and I hereby accept the Terms |
|
I think it might be worth to add/update docs |
| "sigs.k8s.io/controller-runtime/pkg/reconcile" | ||
| ) | ||
|
|
||
| var _ = Describe("Telemetry Controller", func() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should have some controller test cases
pkg/splunk/enterprise/telemetry.go
Outdated
| scopedLog.Info("Updated last transmission time in configmap", "newStatus", cm.Data[telStatusKey]) | ||
| } | ||
|
|
||
| func collectResourceTelData(resources corev1.ResourceRequirements, data map[string]string) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we refactor this code to make it much easier to read, or use generics
an example
func collectDeploymentTelDataRefactored(ctx context.Context, client splcommon.ControllerClient, deploymentData map[string]interface{}) map[string][]splcommon.MetaObject {
reqLogger := log.FromContext(ctx)
scopedLog := reqLogger.WithName("collectDeploymentTelData")
crWithTelAppList := make(map[string][]splcommon.MetaObject)
scopedLog.Info("Start collecting deployment telemetry data")
// Define all CR handlers in a slice
handlers := []crListHandler{
{kind: "Standalone", listFunc: listStandalones, checkTelApp: true},
{kind: "LicenseManager", listFunc: listLicenseManagers, checkTelApp: true},
{kind: "LicenseMaster", listFunc: listLicenseMasters, checkTelApp: true},
{kind: "SearchHeadCluster", listFunc: listSearchHeadClusters, checkTelApp: true},
{kind: "IndexerCluster", listFunc: listIndexerClusters, checkTelApp: false},
{kind: "ClusterManager", listFunc: listClusterManagers, checkTelApp: true},
{kind: "ClusterMaster", listFunc: listClusterMasters, checkTelApp: true},
{kind: "MonitoringConsole", listFunc: listMonitoringConsoles, checkTelApp: false},
}
// Process each CR type using the same logic
for _, handler := range handlers {
processCRType(ctx, client, handler, deploymentData, crWithTelAppList, scopedLog)
}
return crWithTelAppList
}
// processCRType is the common processing logic for all CR types
func processCRType(
ctx context.Context,
client splcommon.ControllerClient,
handler crListHandler,
deploymentData map[string]interface{},
crWithTelAppList map[string][]splcommon.MetaObject,
scopedLog interface{}, // Using interface{} to avoid import issues, should be logr.Logger
) {
items, err := handler.listFunc(ctx, client)
if err != nil {
// scopedLog.Error(err, "Failed to list objects", "kind", handler.kind)
return
}
if len(items) == 0 {
return
}
// Create per-kind data map
perKindData := make(map[string]interface{})
deploymentData[handler.kind] = perKindData
// Process each item
for _, item := range items {
// scopedLog.Info("Collecting data", "kind", item.kind, "name", item.name, "namespace", item.namespace)
crResourceData := make(map[string]string)
perKindData[item.name] = crResourceData
// Collect resource telemetry data
if resources, ok := item.resources.(corev1.ResourceRequirements); ok {
collectResourceTelData(resources, crResourceData)
}
// Add to telemetry app list if applicable
if handler.checkTelApp && item.hasTelApp {
crWithTelAppList[handler.kind] = append(crWithTelAppList[handler.kind], item.cr)
} else if handler.checkTelApp && !item.hasTelApp {
// scopedLog.Info("Telemetry app is not installed for this CR", "kind", item.kind, "name", item.name)
}
}
}
// List functions for each CR type - these extract the common pattern
func listStandalones(ctx context.Context, client splcommon.ControllerClient) ([]crItem, error) {
var list enterpriseApi.StandaloneList
err := client.List(ctx, &list)
if err != nil {
return nil, err
}
items := make([]crItem, 0, len(list.Items))
for i := range list.Items {
cr := &list.Items[i]
items = append(items, crItem{
name: cr.GetName(),
namespace: cr.GetNamespace(),
kind: cr.Kind,
resources: cr.Spec.CommonSplunkSpec.Resources,
hasTelApp: cr.Status.TelAppInstalled,
cr: cr,
})
}
return items, nil
}
func listLicenseManagers(ctx context.Context, client splcommon.ControllerClient) ([]crItem, error) {
var list enterpriseApi.LicenseManagerList
err := client.List(ctx, &list)
if err != nil {
return nil, err
}
items := make([]crItem, 0, len(list.Items))
for i := range list.Items {
cr := &list.Items[i]
items = append(items, crItem{
name: cr.GetName(),
namespace: cr.GetNamespace(),
kind: cr.Kind,
resources: cr.Spec.CommonSplunkSpec.Resources,
hasTelApp: cr.Status.TelAppInstalled,
cr: cr,
})
}
return items, nil
}
func listLicenseMasters(ctx context.Context, client splcommon.ControllerClient) ([]crItem, error) {
var list enterpriseApiV3.LicenseMasterList
err := client.List(ctx, &list)
if err != nil {
return nil, err
}
items := make([]crItem, 0, len(list.Items))
for i := range list.Items {
cr := &list.Items[i]
items = append(items, crItem{
name: cr.GetName(),
namespace: cr.GetNamespace(),
kind: cr.Kind,
resources: cr.Spec.CommonSplunkSpec.Resources,
hasTelApp: cr.Status.TelAppInstalled,
cr: cr,
})
}
return items, nil
}
func listSearchHeadClusters(ctx context.Context, client splcommon.ControllerClient) ([]crItem, error) {
var list enterpriseApi.SearchHeadClusterList
err := client.List(ctx, &list)
if err != nil {
return nil, err
}
items := make([]crItem, 0, len(list.Items))
for i := range list.Items {
cr := &list.Items[i]
items = append(items, crItem{
name: cr.GetName(),
namespace: cr.GetNamespace(),
kind: cr.Kind,
resources: cr.Spec.CommonSplunkSpec.Resources,
hasTelApp: cr.Status.TelAppInstalled,
cr: cr,
})
}
return items, nil
}
func listIndexerClusters(ctx context.Context, client splcommon.ControllerClient) ([]crItem, error) {
var list enterpriseApi.IndexerClusterList
err := client.List(ctx, &list)
if err != nil {
return nil, err
}
items := make([]crItem, 0, len(list.Items))
for i := range list.Items {
cr := &list.Items[i]
items = append(items, crItem{
name: cr.GetName(),
namespace: cr.GetNamespace(),
kind: cr.Kind,
resources: cr.Spec.CommonSplunkSpec.Resources,
hasTelApp: false, // IndexerClusters don't track TelAppInstalled
cr: cr,
})
}
return items, nil
}
func listClusterManagers(ctx context.Context, client splcommon.ControllerClient) ([]crItem, error) {
var list enterpriseApi.ClusterManagerList
err := client.List(ctx, &list)
if err != nil {
return nil, err
}
items := make([]crItem, 0, len(list.Items))
for i := range list.Items {
cr := &list.Items[i]
items = append(items, crItem{
name: cr.GetName(),
namespace: cr.GetNamespace(),
kind: cr.Kind,
resources: cr.Spec.CommonSplunkSpec.Resources,
hasTelApp: cr.Status.TelAppInstalled,
cr: cr,
})
}
return items, nil
}
func listClusterMasters(ctx context.Context, client splcommon.ControllerClient) ([]crItem, error) {
var list enterpriseApiV3.ClusterMasterList
err := client.List(ctx, &list)
if err != nil {
return nil, err
}
items := make([]crItem, 0, len(list.Items))
for i := range list.Items {
cr := &list.Items[i]
items = append(items, crItem{
name: cr.GetName(),
namespace: cr.GetNamespace(),
kind: cr.Kind,
resources: cr.Spec.CommonSplunkSpec.Resources,
hasTelApp: cr.Status.TelAppInstalled,
cr: cr,
})
}
return items, nil
}
func listMonitoringConsoles(ctx context.Context, client splcommon.ControllerClient) ([]crItem, error) {
var list enterpriseApi.MonitoringConsoleList
err := client.List(ctx, &list)
if err != nil {
return nil, err
}
items := make([]crItem, 0, len(list.Items))
for i := range list.Items {
cr := &list.Items[i]
items = append(items, crItem{
name: cr.GetName(),
namespace: cr.GetNamespace(),
kind: cr.Kind,
resources: cr.Spec.CommonSplunkSpec.Resources,
hasTelApp: false, // MonitoringConsoles don't track TelAppInstalled
cr: cr,
})
}
return items, nil
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
code has 47% test coverage lets try to move to 90%
Description.
This PR implement SOK Telemetry enhancement. ERD:
https://cisco-my.sharepoint.com/:w:/p/mqiu/IQBoVUuEEY1SR4rDjbja0iPuAeN5dxFG-K-ZPpvO6RoWJp0?e=n5R1Ow
What does this PR have in it?.
Periodically collect (once per day) and send SOK telemetry which includes:
a. SOK version.
b. CPU/Memory settings (limit and request) of containers including standalone, searchheadcluster, indexercluster,
clustermaster, clustermanager, licensemaster and licensemanager.
c. LincenseInfo (Splunk license ID and license type).
Key Changes.
Highlight the updates in specific files
Testing and Verification.
Tested on s1, c3 and m4.
How did you test these changes? What automated tests are added?.
Added telemetry verification to existing s1, c3 and m4 tests.
Related Issues
Jira tickets, GitHub issues, Support tickets...
https://splunk.atlassian.net/browse/CSPL-4371.
PR Checklist