Skip to content

Commit

Permalink
peer-assistant: support the case of ppr created by MHC
Browse files Browse the repository at this point in the history
if node loses api-server connectivity, it asks the peers to check if PPR exists in api-server, but in which namespace?
also, should it look for a ppr with node name or machine name?

this PR addresses this need by using the last seen namespace and last seen ppr owner ref (machine/node)
fixes https://bugzilla.redhat.com/show_bug.cgi?id=1972555

Signed-off-by: Nir <[email protected]>
  • Loading branch information
n1r1 committed Jun 17, 2021
1 parent 6e92c3e commit ee61f90
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 13 deletions.
14 changes: 14 additions & 0 deletions controllers/poisonpillremediation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,19 @@ var (
Key: "node.kubernetes.io/unschedulable",
Effect: v1.TaintEffectNoSchedule,
}

lastSeenPprNamespace string
isLastSeenPprWasMachine bool
)

func GetLastSeenPprNamespace() string {
return lastSeenPprNamespace
}

func IsLastSeenPprWasMachine() bool {
return isLastSeenPprWasMachine
}

// PoisonPillRemediationReconciler reconciles a PoisonPillRemediation object
type PoisonPillRemediationReconciler struct {
client.Client
Expand Down Expand Up @@ -89,6 +100,8 @@ func (r *PoisonPillRemediationReconciler) Reconcile(ctx context.Context, req ctr
return ctrl.Result{}, err
}

lastSeenPprNamespace = req.Namespace

node, err := r.getNodeFromPpr(ppr)
if err != nil {
if apiErrors.IsNotFound(err) {
Expand Down Expand Up @@ -223,6 +236,7 @@ func (r *PoisonPillRemediationReconciler) getNodeFromPpr(ppr *v1alpha1.PoisonPil

for _, ownerRef := range ppr.OwnerReferences {
if ownerRef.Kind == "Machine" {
isLastSeenPprWasMachine = true
return r.getNodeFromMachine(ownerRef, ppr.Namespace)
}
}
Expand Down
72 changes: 59 additions & 13 deletions pkg/peerassistant/health_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,29 @@ import (
"context"
poisonPillApis "github.com/medik8s/poison-pill/api"
"github.com/medik8s/poison-pill/api/v1alpha1"
"github.com/medik8s/poison-pill/controllers"
corev1 "k8s.io/api/core/v1"
apiErrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
)

const (
pprNamespace = "medik8s"
)
const machineAnnotation = "machine.openshift.io/machine" //todo this is openshift specific

var client dynamic.Interface
var pprRes = schema.GroupVersionResource{Group: v1alpha1.GroupVersion.Group,
Version: v1alpha1.GroupVersion.Version,
Resource: "poisonpillremediations"}
var (
client dynamic.Interface
pprRes = schema.GroupVersionResource{Group: v1alpha1.GroupVersion.Group,
Version: v1alpha1.GroupVersion.Version,
Resource: "poisonpillremediations"}
nodeRes = schema.GroupVersionResource{Group: corev1.SchemeGroupVersion.Group,
Version: corev1.SchemeGroupVersion.Version,
Resource: "nodes"}
logger = zap.New().WithName("health-checker")
)

func init() {
// creates the in-cluster config
Expand All @@ -36,18 +43,57 @@ func init() {
}

func isHealthy(nodeName string) poisonPillApis.HealthCheckResponse {
log := zap.New().WithName("health-checker")
log.Info("checking health for", "node", nodeName)
_, err := client.Resource(pprRes).Namespace(pprNamespace).Get(context.TODO(), nodeName, metav1.GetOptions{})
logger.Info("checking health for", "node", nodeName)

namespace := controllers.GetLastSeenPprNamespace()
isMachine := controllers.IsLastSeenPprWasMachine()

if isMachine {
return isHealthyMachine(nodeName, namespace)
} else {
return isHealthyNode(nodeName, namespace)
}
}

func isHealthyNode(nodeName string, namespace string) poisonPillApis.HealthCheckResponse {
return isHealthyByPpr(nodeName, namespace)
}

func isHealthyByPpr(pprName string, pprNamespace string) poisonPillApis.HealthCheckResponse {
_, err := client.Resource(pprRes).Namespace(pprNamespace).Get(context.TODO(), pprName, metav1.GetOptions{})
if err != nil {
if apiErrors.IsNotFound(err) {
log.Info("healthy")
logger.Info("healthy")
return poisonPillApis.Healthy
}
log.Info("api errror")
logger.Error(err, "api error")
return poisonPillApis.ApiError
}

log.Info("unhealthy")
logger.Info("unhealthy")
return poisonPillApis.Unhealthy
}

func isHealthyMachine(nodeName string, namespace string) poisonPillApis.HealthCheckResponse {
node, err := client.Resource(nodeRes).Namespace("").Get(context.TODO(), nodeName, metav1.GetOptions{})
if err != nil {
logger.Error(err, "api error")
return poisonPillApis.ApiError
}

ann := node.GetAnnotations()
namespacedMachine, exists := ann[machineAnnotation]

if !exists {
logger.Info("node doesn't have machine annotation")
return poisonPillApis.Unhealthy //todo is this the correct response?
}
_, machineName, err := cache.SplitMetaNamespaceKey(namespacedMachine)

if err != nil {
logger.Error(err, "failed to parse machine annotation on the node")
return poisonPillApis.Unhealthy //todo is this the correct response?
}

return isHealthyByPpr(machineName, namespace)
}

0 comments on commit ee61f90

Please sign in to comment.