Browse Source

KAFKA-8972 (2.4 blocker): TaskManager state should always be updated after rebalance (#7620)

Currently when we identify version probing we return early from onAssignment and never get to updating the TaskManager and general state with the new assignment. Since we do actually give out "real" assignments even during version probing, a StreamThread should take real ownership of its tasks/partitions including cleaning them up in onPartitionsRevoked which gets invoked when we call onLeavePrepare as part of triggering the follow-up rebalance.

Every member will always get an assignment encoded with the lowest common version, so there should be no problem decoding a VP assignment. We should just allow onAssignment to proceed as usual so that the TaskManager is in a consistent state, and knows what all its tasks/partitions are when the first rebalance completes and the next one is triggered.

Reviewers: Boyang Chen <boyang@confluent.io>, Matthias J. Sax <mjsax@apache.org>, Guozhang Wang <wangguoz@gmail.com>
pull/6329/merge
A. Sophie Blee-Goldman 5 years ago committed by Guozhang Wang
parent
commit
d61b0c131c
  1. 6
      clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java
  2. 4
      streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedStandbyTasks.java
  3. 4
      streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedStreamsTasks.java
  4. 1
      streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignor.java
  5. 10
      streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsRebalanceListener.java
  6. 13
      streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java
  7. 1
      streams/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java

6
clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java

@ -692,6 +692,12 @@ public final class ConsumerCoordinator extends AbstractCoordinator {
@Override @Override
public void onLeavePrepare() { public void onLeavePrepare() {
// Save the current Generation and use that to get the memberId, as the hb thread can change it at any time
final Generation currentGeneration = generation();
final String memberId = currentGeneration.memberId;
log.debug("Executing onLeavePrepare with generation {} and memberId {}", currentGeneration, memberId);
// we should reset assignment and trigger the callback before leaving group // we should reset assignment and trigger the callback before leaving group
Set<TopicPartition> droppedPartitions = new HashSet<>(subscriptions.assignedPartitions()); Set<TopicPartition> droppedPartitions = new HashSet<>(subscriptions.assignedPartitions());

4
streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedStandbyTasks.java

@ -33,10 +33,10 @@ class AssignedStandbyTasks extends AssignedTasks<StandbyTask> {
@Override @Override
public void shutdown(final boolean clean) { public void shutdown(final boolean clean) {
final String shutdownType = clean ? "Clean" : "Unclean"; final String shutdownType = clean ? "Clean" : "Unclean";
log.debug(shutdownType + " shutdown of all standby tasks" + "\n" + log.debug("{} shutdown of all standby tasks" + "\n" +
"non-initialized standby tasks to close: {}" + "\n" + "non-initialized standby tasks to close: {}" + "\n" +
"running standby tasks to close: {}", "running standby tasks to close: {}",
clean, created.keySet(), running.keySet()); shutdownType, created.keySet(), running.keySet());
super.shutdown(clean); super.shutdown(clean);
} }

4
streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedStreamsTasks.java

@ -494,12 +494,12 @@ class AssignedStreamsTasks extends AssignedTasks<StreamTask> implements Restorin
@Override @Override
public void shutdown(final boolean clean) { public void shutdown(final boolean clean) {
final String shutdownType = clean ? "Clean" : "Unclean"; final String shutdownType = clean ? "Clean" : "Unclean";
log.debug(shutdownType + " shutdown of all active tasks" + "\n" + log.debug("{} shutdown of all active tasks" + "\n" +
"non-initialized stream tasks to close: {}" + "\n" + "non-initialized stream tasks to close: {}" + "\n" +
"restoring tasks to close: {}" + "\n" + "restoring tasks to close: {}" + "\n" +
"running stream tasks to close: {}" + "\n" + "running stream tasks to close: {}" + "\n" +
"suspended stream tasks to close: {}", "suspended stream tasks to close: {}",
clean, created.keySet(), restoring.keySet(), running.keySet(), suspended.keySet()); shutdownType, created.keySet(), restoring.keySet(), running.keySet(), suspended.keySet());
super.shutdown(clean); super.shutdown(clean);
} }

1
streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignor.java

@ -1110,7 +1110,6 @@ public class StreamsPartitionAssignor implements ConsumerPartitionAssignor, Conf
// Check if this was a version probing rebalance and check the error code to trigger another rebalance if so // Check if this was a version probing rebalance and check the error code to trigger another rebalance if so
if (maybeUpdateSubscriptionVersion(receivedAssignmentMetadataVersion, latestCommonlySupportedVersion)) { if (maybeUpdateSubscriptionVersion(receivedAssignmentMetadataVersion, latestCommonlySupportedVersion)) {
setAssignmentErrorCode(AssignorError.VERSION_PROBING.code()); setAssignmentErrorCode(AssignorError.VERSION_PROBING.code());
return;
} }
// version 1 field // version 1 field

10
streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsRebalanceListener.java

@ -68,15 +68,7 @@ public class StreamsRebalanceListener implements ConsumerRebalanceListener {
if (streamThread.setState(State.PARTITIONS_ASSIGNED) == null) { if (streamThread.setState(State.PARTITIONS_ASSIGNED) == null) {
log.debug( log.debug(
"Skipping task creation in rebalance because we are already in {} state.", "Skipping task creation in rebalance because we are already in {} state.",
streamThread.state() streamThread.state());
);
} else if (streamThread.getAssignmentErrorCode() != AssignorError.NONE.code()) {
log.debug(
"Encountered assignment error during partition assignment: {}. Skipping task initialization and "
+ "pausing any partitions we may have been assigned.",
streamThread.getAssignmentErrorCode()
);
taskManager.pausePartitions();
} else { } else {
// Close non-reassigned tasks before initializing new ones as we may have suspended active // Close non-reassigned tasks before initializing new ones as we may have suspended active
// tasks that become standbys or vice versa // tasks that become standbys or vice versa

13
streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java

@ -467,21 +467,22 @@ public class TaskManager {
} }
log.debug("Assigning metadata with: " + log.debug("Assigning metadata with: " +
"\tactiveTasks: {},\n" + "\tpreviousAssignedActiveTasks: {},\n" +
"\tstandbyTasks: {}\n" + "\tpreviousAssignedStandbyTasks: {}\n" +
"The updated active task states are: \n" + "The updated task states are: \n" +
"\tassignedActiveTasks {},\n" + "\tassignedActiveTasks {},\n" +
"\tassignedStandbyTasks {},\n" + "\tassignedStandbyTasks {},\n" +
"\taddedActiveTasks {},\n" + "\taddedActiveTasks {},\n" +
"\taddedStandbyTasks {},\n" + "\taddedStandbyTasks {},\n" +
"\trevokedActiveTasks {},\n" + "\trevokedActiveTasks {},\n" +
"\trevokedStandbyTasks {}", "\trevokedStandbyTasks {}",
activeTasks, standbyTasks,
assignedActiveTasks, assignedStandbyTasks, assignedActiveTasks, assignedStandbyTasks,
activeTasks, standbyTasks,
addedActiveTasks, addedStandbyTasks, addedActiveTasks, addedStandbyTasks,
revokedActiveTasks, revokedStandbyTasks); revokedActiveTasks, revokedStandbyTasks);
this.assignedActiveTasks = activeTasks;
this.assignedStandbyTasks = standbyTasks; assignedActiveTasks = activeTasks;
assignedStandbyTasks = standbyTasks;
} }
public void updateSubscriptionsFromAssignment(final List<TopicPartition> partitions) { public void updateSubscriptionsFromAssignment(final List<TopicPartition> partitions) {

1
streams/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java

@ -195,7 +195,6 @@ public class StreamsUpgradeTest {
if (maybeUpdateSubscriptionVersion(usedVersion, info.commonlySupportedVersion())) { if (maybeUpdateSubscriptionVersion(usedVersion, info.commonlySupportedVersion())) {
setAssignmentErrorCode(AssignorError.VERSION_PROBING.code()); setAssignmentErrorCode(AssignorError.VERSION_PROBING.code());
usedSubscriptionMetadataVersionPeek.set(usedSubscriptionMetadataVersion); usedSubscriptionMetadataVersionPeek.set(usedSubscriptionMetadataVersion);
return;
} }
final List<TopicPartition> partitions = new ArrayList<>(assignment.partitions()); final List<TopicPartition> partitions = new ArrayList<>(assignment.partitions());

Loading…
Cancel
Save