Skip to content

Commit

Permalink
add time thresholds to job pod and report CR loops (#9)
Browse files Browse the repository at this point in the history
Added time thresholds to loops to avoid infinite loops:

- Loop waiting for job pod to be complete.
- Loop checking creation of report CR.
  • Loading branch information
shirmoran authored Jan 15, 2024
1 parent 15283c4 commit b60ec4b
Showing 1 changed file with 28 additions and 5 deletions.
33 changes: 28 additions & 5 deletions controllers/cnfcertificationsuiterun_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ var (
cnfRunPodID int
)

const multiplier = 5
const (
checkInterval = 5 * time.Second
defaultCnfCertSuiteTimeout = time.Hour
)

// +kubebuilder:rbac:groups="*",resources="*",verbs="*"
// +kubebuilder:rbac:urls="*",verbs="*"
Expand All @@ -80,13 +83,27 @@ func (r *CnfCertificationSuiteRunReconciler) updateJobPhaseStatus(cnfrun *cnfcer
}
}

func (r *CnfCertificationSuiteRunReconciler) waitForCnfCertJobPodToComplete(ctx context.Context, namespace string, cnfCertJobPod *corev1.Pod) {
func (r *CnfCertificationSuiteRunReconciler) getJobRunTimeThreshold(timeoutStr string) time.Duration {
jobRunTimeThreshold, err := time.ParseDuration(timeoutStr)
if err != nil {
logrus.Info("Couldn't extarct job run timeout, setting default timeout.")
return defaultCnfCertSuiteTimeout
}
return jobRunTimeThreshold
}

func (r *CnfCertificationSuiteRunReconciler) waitForCnfCertJobPodToComplete(ctx context.Context, namespace string, cnfCertJobPod *corev1.Pod, jobRunTimeThreshold time.Duration) {
cnfCertJobNamespacedName := types.NamespacedName{
Namespace: namespace,
Name: cnfCertJobPod.Name,
}

startTime := time.Now()
for {
if time.Since(startTime) > jobRunTimeThreshold {
logrus.Error("Time threshold reached, job did not complete")
break
}
switch cnfCertJobPod.Status.Phase {
case corev1.PodSucceeded:
logrus.Info("Cnf job pod has completed successfully.")
Expand All @@ -96,7 +113,7 @@ func (r *CnfCertificationSuiteRunReconciler) waitForCnfCertJobPodToComplete(ctx
return
default:
logrus.Info("Cnf job pod is running. Current status: ", cnfCertJobPod.Status.Phase)
time.Sleep(multiplier * time.Second)
time.Sleep(checkInterval)
}
err := r.Get(ctx, cnfCertJobNamespacedName, cnfCertJobPod)
if err != nil {
Expand All @@ -116,7 +133,8 @@ func (r *CnfCertificationSuiteRunReconciler) getCertSuiteContainerExitStatus(cnf
}

func (r *CnfCertificationSuiteRunReconciler) handleEndOfCnfCertSuiteRun(ctx context.Context, namespace string, cnfCertJobPod *corev1.Pod, cnfrun *cnfcertificationsv1alpha1.CnfCertificationSuiteRun) {
r.waitForCnfCertJobPodToComplete(ctx, namespace, cnfCertJobPod)
jobRunTimeThreshold := r.getJobRunTimeThreshold(cnfrun.Spec.TimeOut)
r.waitForCnfCertJobPodToComplete(ctx, namespace, cnfCertJobPod, jobRunTimeThreshold)

// cnf-cert-job has terminated - checking exit status of cert suite
certSuiteExitStatus := r.getCertSuiteContainerExitStatus(cnfCertJobPod)
Expand All @@ -136,10 +154,15 @@ func (r *CnfCertificationSuiteRunReconciler) waitForReportToBeCreated(ctx contex
Namespace: namespace,
Name: reportName,
}
startTime := time.Now()
var cnfreport cnfcertificationsv1alpha1.CnfCertificationSuiteReport
for err := r.Get(ctx, reportNamespacedName, &cnfreport); err != nil; {
if time.Since(startTime) > defaultCnfCertSuiteTimeout {
logrus.Error("Time threshold reached, report is not found")
break
}
logrus.Infof("Waiting for %s to be created...", reportNamespacedName.Name)
time.Sleep(multiplier * time.Second)
time.Sleep(checkInterval)
err = r.Get(ctx, reportNamespacedName, &cnfreport)
}
logrus.Infof("%s has been created", reportNamespacedName.Name)
Expand Down

0 comments on commit b60ec4b

Please sign in to comment.