Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proof of concept implementation for KafkaRoller 2.0 #11020

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ public class ClusterOperatorConfig {
*/
public static final ConfigParameter<Long> OPERATION_TIMEOUT_MS = new ConfigParameter<>("STRIMZI_OPERATION_TIMEOUT_MS", LONG, "300000", CONFIG_VALUES);

/**
* The maximum number of broker nodes that can be restarted at once
*/
public static final ConfigParameter<Integer> MAX_RESTART_BATCH_SIZE = new ConfigParameter<>("STRIMZI_MAX_RESTART_BATCH_SIZE", INTEGER, "1", CONFIG_VALUES);

/**
* Timeout used to wait for a Kafka Connect builds to finish
*/
Expand Down Expand Up @@ -465,6 +470,13 @@ public long getOperationTimeoutMs() {
return get(OPERATION_TIMEOUT_MS);
}

/**
* @return how many broker nodes can be restarted in parallel
*/
public int getMaxRestartBatchSize() {
return get(MAX_RESTART_BATCH_SIZE);
}

/**
* @return How many milliseconds should we wait for Kafka Connect build to complete
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,12 @@ public int hashCode() {
public String toString() {
return reasons.keySet().toString();
}

/**
* @param reason The reason to test.
* @return true if these reasons are just the single given reason.
*/
public boolean isSingletonOf(RestartReason reason) {
return reasons.size() == 1 && reasons.containsKey(reason);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
import io.strimzi.operator.cluster.model.RestartReasons;
import io.strimzi.operator.cluster.operator.resource.ConcurrentDeletionException;
import io.strimzi.operator.cluster.operator.resource.KafkaAgentClientProvider;
import io.strimzi.operator.cluster.operator.resource.KafkaRoller;
//import io.strimzi.operator.cluster.operator.resource.KafkaRoller;
import io.strimzi.operator.cluster.operator.resource.ResourceOperatorSupplier;
import io.strimzi.operator.cluster.operator.resource.events.KubernetesRestartEventPublisher;
import io.strimzi.operator.cluster.operator.resource.kubernetes.ClusterRoleBindingOperator;
Expand All @@ -65,9 +65,10 @@
import io.strimzi.operator.cluster.operator.resource.kubernetes.ServiceOperator;
import io.strimzi.operator.cluster.operator.resource.kubernetes.StorageClassOperator;
import io.strimzi.operator.cluster.operator.resource.kubernetes.StrimziPodSetOperator;
import io.strimzi.operator.cluster.operator.resource.rolling.RackRolling;
import io.strimzi.operator.common.AdminClientProvider;
import io.strimzi.operator.common.Annotations;
import io.strimzi.operator.common.BackOff;
//import io.strimzi.operator.common.e;
import io.strimzi.operator.common.Reconciliation;
import io.strimzi.operator.common.ReconciliationLogger;
import io.strimzi.operator.common.Util;
Expand Down Expand Up @@ -112,6 +113,7 @@ public class KafkaReconciler {

// Various settings
private final long operationTimeoutMs;
private final int maxRestartBatchSize;
private final boolean isNetworkPolicyGeneration;
private final boolean isPodDisruptionBudgetGeneration;
private final boolean isKafkaNodePoolsEnabled;
Expand Down Expand Up @@ -195,6 +197,7 @@ public KafkaReconciler(
this.reconciliation = reconciliation;
this.vertx = vertx;
this.operationTimeoutMs = config.getOperationTimeoutMs();
this.maxRestartBatchSize = config.getMaxRestartBatchSize();
this.kafkaNodePoolCrs = nodePools;
this.kafka = kafka;

Expand Down Expand Up @@ -466,23 +469,47 @@ protected Future<Void> maybeRollKafka(
Map<Integer, Map<String, String>> kafkaAdvertisedPorts,
boolean allowReconfiguration
) {
return new KafkaRoller(
reconciliation,
vertx,
podOperator,
1_000,
operationTimeoutMs,
() -> new BackOff(250, 2, 10),
nodes,
this.coTlsPemIdentity,
adminClientProvider,
kafkaAgentClientProvider,
brokerId -> kafka.generatePerBrokerConfiguration(brokerId, kafkaAdvertisedHostnames, kafkaAdvertisedPorts),
logging,
kafka.getKafkaVersion(),
allowReconfiguration,
eventsPublisher
).rollingRestart(podNeedsRestart);
Function<Integer, String> kafkaConfigProvider = nodeId -> kafka.generatePerBrokerConfiguration(nodeId, kafkaAdvertisedHostnames, kafkaAdvertisedPorts);
//TODO: Change this logic to run the new roller if the feature gate for it is enabled (also add feature gate).

var rr = RackRolling.rollingRestart(
podOperator,
nodes,
reconciliation,
// Remap the function from pod to RestartReasons to nodeId to RestartReasons
nodeId -> podNeedsRestart.apply(podOperator.get(reconciliation.namespace(), nodes.stream().filter(nodeRef -> nodeRef.nodeId() == nodeId).collect(Collectors.toList()).get(0).podName())),
this.coTlsPemIdentity,
adminClientProvider,
kafkaAgentClientProvider,
kafkaConfigProvider,
allowReconfiguration,
kafka.getKafkaVersion(),
logging,
operationTimeoutMs,
maxRestartBatchSize,
3,
3,
10,
eventsPublisher);

return rr.executeRollingAsync(vertx);
// return new KafkaRoller(
// reconciliation,
// vertx,
// podOperator,
// 1_000,
// operationTimeoutMs,
// () -> new BackOff(250, 2, 10),
// nodes,
// this.coTlsPemIdentity,
// adminClientProvider,
// kafkaAgentClientProvider,
// kafkaConfigProvider,
// logging,
// kafka.getKafkaVersion(),
// allowReconfiguration,
// eventsPublisher
// ).rollingRestart(podNeedsRestart);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
/**
* Java representation of the JSON response from the /v1/broker-state endpoint of the KafkaAgent
*/
class BrokerState {
public class BrokerState {
private static final int BROKER_RECOVERY_STATE = 2;

private final int code;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ public class KafkaBrokerConfigurationDiff extends AbstractJsonDiff {
* @param kafkaVersion Kafka version
* @param brokerNodeRef Broker node reference
*/
protected KafkaBrokerConfigurationDiff(Reconciliation reconciliation, Config brokerConfigs, String desired, KafkaVersion kafkaVersion, NodeRef brokerNodeRef) {
public KafkaBrokerConfigurationDiff(Reconciliation reconciliation,
Config brokerConfigs,
String desired,
KafkaVersion kafkaVersion,
NodeRef brokerNodeRef) {
this.reconciliation = reconciliation;
this.configModel = KafkaConfiguration.readConfigModel(kafkaVersion);
this.brokerConfigDiff = diff(brokerNodeRef, desired, brokerConfigs, configModel);
Expand All @@ -83,7 +87,7 @@ protected KafkaBrokerConfigurationDiff(Reconciliation reconciliation, Config bro
/**
* @return Returns true if the configuration can be updated dynamically
*/
protected boolean canBeUpdatedDynamically() {
public boolean canBeUpdatedDynamically() {
boolean result = true;
for (AlterConfigOp entry : brokerConfigDiff) {
if (isEntryReadOnly(entry.configEntry())) {
Expand All @@ -107,7 +111,7 @@ private boolean isEntryReadOnly(ConfigEntry entry) {
* Returns configuration difference
* @return Collection of AlterConfigOp containing difference between current and desired configuration
*/
protected Collection<AlterConfigOp> getConfigDiff() {
public Collection<AlterConfigOp> getConfigDiff() {
return brokerConfigDiff;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public class KafkaBrokerLoggingConfigurationDiff extends AbstractJsonDiff {
* @param brokerConfigs Current broker configuration from Kafka Admin API
* @param desired Desired logging configuration
*/
protected KafkaBrokerLoggingConfigurationDiff(Reconciliation reconciliation, Config brokerConfigs, String desired) {
public KafkaBrokerLoggingConfigurationDiff(Reconciliation reconciliation, Config brokerConfigs, String desired) {
this.reconciliation = reconciliation;
this.diff = diff(desired, brokerConfigs);
}
Expand All @@ -47,7 +47,7 @@ protected KafkaBrokerLoggingConfigurationDiff(Reconciliation reconciliation, Con
* Returns logging difference
* @return Collection of AlterConfigOp containing difference between current and desired logging configuration
*/
protected Collection<AlterConfigOp> getLoggingDiff() {
public Collection<AlterConfigOp> getLoggingDiff() {
return diff;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public Future<Void> restart(Reconciliation reconciliation, Pod pod, long timeout
String namespace = pod.getMetadata().getNamespace();
String podName = pod.getMetadata().getName();
Promise<Void> deleteFinished = Promise.promise();
LOGGER.infoCr(reconciliation, "Rolling pod {}", podName);
LOGGER.infoCr(reconciliation, "Deleting pod {}", podName);

// Determine generation of deleted pod
String deleted = getPodUid(pod);
Expand All @@ -66,7 +66,7 @@ public Future<Void> restart(Reconciliation reconciliation, Pod pod, long timeout
boolean done = !deleted.equals(newUid);

if (done) {
LOGGER.debugCr(reconciliation, "Rolling pod {} finished", podName);
LOGGER.debugCr(reconciliation, "Deleting pod {} finished", podName);
}

return done;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Strimzi authors.
* License: Apache License 2.0 (see the file LICENSE or http://apache.org/licenses/LICENSE-2.0.html).
*/
package io.strimzi.operator.cluster.operator.resource.rolling;

import io.strimzi.operator.cluster.model.NodeRef;

/**
* An abstraction over a KafkaAgent client.
*/
interface AgentClient {

/** @return The broker state, according to the Kafka Agent */
BrokerState getBrokerState(NodeRef nodeRef);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright Strimzi authors.
* License: Apache License 2.0 (see the file LICENSE or http://apache.org/licenses/LICENSE-2.0.html).
*/
package io.strimzi.operator.cluster.operator.resource.rolling;

import io.strimzi.operator.cluster.model.NodeRef;
import io.strimzi.operator.cluster.operator.resource.KafkaAgentClient;

class AgentClientImpl implements AgentClient {
private final KafkaAgentClient kafkaAgentClient;

AgentClientImpl(KafkaAgentClient kafkaAgentClient) {
this.kafkaAgentClient = kafkaAgentClient;

}

@Override
public BrokerState getBrokerState(NodeRef nodeRef) {
var result = kafkaAgentClient.getBrokerState(nodeRef.podName());
BrokerState brokerState = BrokerState.fromValue((byte) result.code());
brokerState.setRemainingSegmentsToRecover(result.remainingSegmentsToRecover());
brokerState.setRemainingLogsToRecover(result.remainingLogsToRecover());
return brokerState;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright Strimzi authors.
* License: Apache License 2.0 (see the file LICENSE or http://apache.org/licenses/LICENSE-2.0.html).
*/
package io.strimzi.operator.cluster.operator.resource.rolling;

import io.strimzi.operator.common.UncheckedInterruptedException;

import java.util.concurrent.TimeoutException;
import java.util.function.BooleanSupplier;
import java.util.function.Supplier;

/**
* Timing utility for polling loops which allows to set an alarm (in terms of a duration from "now") and
* subsequently sleep the executing thread. If the alarm duration is exceeded the call to sleep will throw a
* {@link TimeoutException}. This can be used to simplify writing polling logic like the following
* <pre>{@code
* long timeoutMs = 60_000
* long pollIntervalMs = 1_000;
* Alarm alarm = Alarm.start(time, timeoutMs);
* while (true) {
* // do some processing
* if (processingSuccess) {
* timeoutMs = alarm.remainingMs();
* // we might want to use the remaining timeout when
* // a single timeout is used for a sequence of polling tasks
* break;
* }
* alarm.sleep(pollIntervalMs);
* }
* }</pre>
* This logic is encapsulated in the {@link #poll(long, BooleanSupplier)} method.
*/
public class Alarm {

final Time time;
final long deadline;
private final Supplier<String> timeoutMessageSupplier;

private Alarm(Time time, long deadline, Supplier<String> timeoutMessageSupplier) {
this.time = time;
this.deadline = deadline;
this.timeoutMessageSupplier = timeoutMessageSupplier;
}

/**
* Creates an Alerm
* @param time The source of time
* @param timeoutMs The timeout for this alarm.
* @param timeoutMessageSupplier The exception message
* @return The alarm
*/
public static Alarm timer(Time time, long timeoutMs, Supplier<String> timeoutMessageSupplier) {
if (timeoutMs < 0) {
throw new IllegalArgumentException();
}
long deadline = time.nanoTime() + 1_000_000 * timeoutMs;
return new Alarm(time, deadline, timeoutMessageSupplier);
}

/**
* @return The remaining number of milliseconds until the deadline passed
*/
public long remainingMs() {
return Math.max(deadline - time.nanoTime(), 0) / 1_000_000L;
}

/**
* Sleep the current thread for at most at least {@code ms} milliseconds, according to
* (and subject to the precision and accuracy of) the configured {@link Time} instance.
* The actual sleep time will be less than {@code ms} if using {@code ms} would exceed this
* alarm's deadline.
* The thread does not lose ownership of any monitors.
* @param ms The number of milliseconds to sleep for.
* @throws TimeoutException If the Alarm's deadline has passed
* @throws InterruptedException If the current thread is interrupted
*/
public void sleep(long ms) throws TimeoutException, InterruptedException {
if (ms < 0) {
throw new IllegalArgumentException();
}
long sleepNs = Math.min(1_000_000L * ms, deadline - time.nanoTime());
if (sleepNs <= 0) {
throw new TimeoutException(timeoutMessageSupplier.get());
}
time.sleep(sleepNs / 1_000_000L, (int) (sleepNs % 1_000_000L));
}

/**
* Test {@code done} at least once, returning when it returns true, and otherwise sleeping for at most approximately
* {@code pollIntervalMs} before repeating, throwing {@link TimeoutException} should this
* alarm expire before {@code done} returns true.
*
* @param pollIntervalMs The polling interval
* @param done A predicate function to detecting when the polling loop is complete.
* @return The remaining time left for this alarm, in ms.
* @throws UncheckedInterruptedException The thread was interrupted
* @throws TimeoutException The {@link #remainingMs()} has reached zero.
*/
public long poll(long pollIntervalMs, BooleanSupplier done) throws TimeoutException {
if (pollIntervalMs <= 0) {
throw new IllegalArgumentException();
}
try {
while (true) {
if (done.getAsBoolean()) {
return this.remainingMs();
}
this.sleep(pollIntervalMs);
}
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
}
}
}
Loading
Loading