Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CNF-11234: Enable RTE metrics to be scraped securely by Prometheus #1107

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ metadata:
}
]
capabilities: Basic Install
createdAt: "2024-12-18T20:58:55Z"
createdAt: "2024-12-19T08:31:49Z"
olm.skipRange: '>=4.18.0 <4.19.0'
operatorframework.io/cluster-monitoring: "true"
operators.operatorframework.io/builder: operator-sdk-v1.36.1
Expand Down Expand Up @@ -359,6 +359,7 @@ spec:
resources:
- configmaps
- serviceaccounts
- services
verbs:
- '*'
- apiGroups:
Expand Down
1 change: 1 addition & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ rules:
resources:
- configmaps
- serviceaccounts
- services
verbs:
- '*'
- apiGroups:
Expand Down
65 changes: 56 additions & 9 deletions controllers/numaresourcesoperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ import (
"github.com/openshift-kni/numaresources-operator/pkg/hash"
"github.com/openshift-kni/numaresources-operator/pkg/images"
"github.com/openshift-kni/numaresources-operator/pkg/loglevel"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
apistate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/api"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/compare"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/merge"
rtestate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
rteupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/rte"
"github.com/openshift-kni/numaresources-operator/pkg/status"
Expand All @@ -83,15 +86,16 @@ type poolDaemonSet struct {
// NUMAResourcesOperatorReconciler reconciles a NUMAResourcesOperator object
type NUMAResourcesOperatorReconciler struct {
client.Client
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtemanifests.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtemanifests.Manifests
RTEMetricsManifests rtemetricsmanifests.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
}

// TODO: narrow down
Expand All @@ -118,6 +122,7 @@ type NUMAResourcesOperatorReconciler struct {
//+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators,verbs=*
//+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators/finalizers,verbs=update
//+kubebuilder:rbac:groups="",resources=services,verbs=*

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
Expand Down Expand Up @@ -578,6 +583,30 @@ func (r *NUMAResourcesOperatorReconciler) syncNUMAResourcesOperatorResources(ctx
return nil, fmt.Errorf("failed to apply (%s) %s/%s: %w", objState.Desired.GetObjectKind().GroupVersionKind(), objState.Desired.GetNamespace(), objState.Desired.GetName(), err)
}
}

for _, obj := range r.RTEMetricsManifests.ToObjects() {
// Check if the object already exists
existingObj := obj.DeepCopyObject().(client.Object)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need the cast here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'll remove the casting:
I'll get this error when using r.Client.Get:
cannot use existingObj (variable of type runtime.Object) as client.Object value in argument to r.Client.Get: runtime.Object does not implement client.Object (missing method GetAnnotations)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and why do we use DeepCopyObject() and not DeepCopy() ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ffromani This because the client.Object implements DeepCopyObject() and not DeepCopy()

err := r.Client.Get(ctx, client.ObjectKeyFromObject(obj), existingObj)
if err != nil && !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("failed to get %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
}
if apierrors.IsNotFound(err) {
err := controllerutil.SetControllerReference(instance, obj, r.Scheme)
if err != nil {
return nil, fmt.Errorf("failed to set controller reference to %s %s: %w", obj.GetNamespace(), obj.GetName(), err)
}
err = r.Client.Create(ctx, obj)
if err != nil {
return nil, fmt.Errorf("failed to create %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
}
} else {
rbaturov marked this conversation as resolved.
Show resolved Hide resolved
if err := updateIfNeeded(ctx, existingObj, obj, r.Client); err != nil {
return nil, err
}
}
}

if len(dsPoolPairs) < len(trees) {
klog.Warningf("daemonset and tree size mismatch: expected %d got in daemonsets %d", len(trees), len(dsPoolPairs))
}
Expand Down Expand Up @@ -780,3 +809,21 @@ func getTreesByNodeGroup(ctx context.Context, cli client.Client, nodeGroups []nr
return nil, fmt.Errorf("unsupported platform")
}
}

func updateIfNeeded(ctx context.Context, existingObj, desiredObj client.Object, cli client.Client) error {
merged, err := merge.MetadataForUpdate(existingObj, desiredObj)
if err != nil {
return fmt.Errorf("could not merge object %s with existing: %w", desiredObj.GetName(), err)
}
isEqual, err := compare.Object(existingObj, merged)
if err != nil {
return fmt.Errorf("could not compare object %s with existing: %w", desiredObj.GetName(), err)
}
if !isEqual {
err = cli.Update(ctx, desiredObj)
if err != nil {
return fmt.Errorf("failed to update %s/%s: %w", desiredObj.GetNamespace(), desiredObj.GetName(), err)
}
}
return nil
}
30 changes: 23 additions & 7 deletions controllers/numaresourcesoperator_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
"github.com/openshift-kni/numaresources-operator/internal/api/annotations"
testobjs "github.com/openshift-kni/numaresources-operator/internal/objects"
"github.com/openshift-kni/numaresources-operator/pkg/images"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
"github.com/openshift-kni/numaresources-operator/pkg/status"
Expand All @@ -70,16 +71,20 @@ func NewFakeNUMAResourcesOperatorReconciler(plat platform.Platform, platVersion
if err != nil {
return nil, err
}

rtemetricsmanifests, err := rtemetricsmanifests.GetManifests(testNamespace)
if err != nil {
return nil, err
}
recorder := record.NewFakeRecorder(bufferSize)

return &NUMAResourcesOperatorReconciler{
Client: fakeClient,
Scheme: scheme.Scheme,
Platform: plat,
APIManifests: apiManifests,
RTEManifests: rteManifests,
Namespace: testNamespace,
Client: fakeClient,
Scheme: scheme.Scheme,
Platform: plat,
APIManifests: apiManifests,
RTEManifests: rteManifests,
RTEMetricsManifests: rtemetricsmanifests,
Namespace: testNamespace,
Images: images.Data{
Builtin: testImageSpec,
},
Expand Down Expand Up @@ -1475,6 +1480,17 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Namespace: testNamespace,
}
Expect(reconciler.Client.Get(context.TODO(), mcp2DSKey, ds)).ToNot(HaveOccurred())

By("Check All RTE metrics components are created")
for _, obj := range reconciler.RTEMetricsManifests.ToObjects() {
objectKey := client.ObjectKeyFromObject(obj)
switch obj.(type) {
case *corev1.Service:
service := &corev1.Service{}
Expect(reconciler.Client.Get(context.TODO(), objectKey, service)).ToNot(HaveOccurred())
default:
}
}
})
When("daemonsets are ready", func() {
var dsDesiredNumberScheduled int32
Expand Down
27 changes: 17 additions & 10 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ import (
intkloglevel "github.com/openshift-kni/numaresources-operator/internal/kloglevel"
"github.com/openshift-kni/numaresources-operator/pkg/hash"
"github.com/openshift-kni/numaresources-operator/pkg/images"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/controlplane"
schedmanifests "github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/manifests/sched"
rteupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/rte"
Expand Down Expand Up @@ -262,18 +263,24 @@ func main() {
klog.ErrorS(err, "unable to render RTE manifests", "controller", "NUMAResourcesOperator")
os.Exit(1)
}
rteMetricsManifests, err := rtemetricsmanifests.GetManifests(namespace)
if err != nil {
klog.ErrorS(err, "unable to load the RTE metrics manifests")
os.Exit(1)
}

if err = (&controllers.NUMAResourcesOperatorReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("numaresources-controller"),
APIManifests: apiManifests,
RTEManifests: rteManifestsRendered,
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("numaresources-controller"),
APIManifests: apiManifests,
RTEManifests: rteManifestsRendered,
RTEMetricsManifests: rteMetricsManifests,
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
}).SetupWithManager(mgr); err != nil {
klog.ErrorS(err, "unable to create controller", "controller", "NUMAResourcesOperator")
os.Exit(1)
Expand Down
64 changes: 64 additions & 0 deletions pkg/metrics/manifests/manifests.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package manifests

import (
"embed"
"fmt"
"path/filepath"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/scheme"
)

//go:embed yaml
var src embed.FS

func Service(namespace string) (*corev1.Service, error) {
obj, err := loadObject(filepath.Join("yaml", "service.yaml"))
if err != nil {
return nil, err
}

service, ok := obj.(*corev1.Service)
if !ok {
return nil, fmt.Errorf("unexpected type, got %t", obj)
}

if namespace != "" {
service.Namespace = namespace
}
return service, nil
}

func deserializeObjectFromData(data []byte) (runtime.Object, error) {
decode := scheme.Codecs.UniversalDeserializer().Decode
obj, _, err := decode(data, nil, nil)
if err != nil {
return nil, err
}
return obj, nil
}

func loadObject(path string) (runtime.Object, error) {
data, err := src.ReadFile(path)
if err != nil {
return nil, err
}
return deserializeObjectFromData(data)
}
27 changes: 27 additions & 0 deletions pkg/metrics/manifests/manifests_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package manifests

import (
"testing"
)

func TestLoad(t *testing.T) {
if obj, err := Service(""); obj == nil || err != nil {
t.Errorf("Service() failed: err=%v", err)
}
}
52 changes: 52 additions & 0 deletions pkg/metrics/manifests/monitor/monitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package sched

import (
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests"
)

type Manifests struct {
Service *corev1.Service
}

func (mf Manifests) ToObjects() []client.Object {
return []client.Object{
mf.Service,
}
}

func (mf Manifests) Clone() Manifests {
return Manifests{
Service: mf.Service.DeepCopy(),
}
}

func GetManifests(namespace string) (Manifests, error) {
var err error
mf := Manifests{}

mf.Service, err = manifests.Service(namespace)
if err != nil {
return mf, err
}

return mf, nil
}
Loading
Loading