From 96bc0b882d0c51d9b58c9f87654e6d133fd9ef34 Mon Sep 17 00:00:00 2001 From: Lucas Wang Date: Sun, 29 Jul 2018 21:06:18 -0700 Subject: [PATCH] KAFKA-7180; Fixing the flaky test testHWCheckpointWithFailuresSingleLogSegment By waiting until server1 has joined the ISR before shutting down server2 Rerun the test method many times after the code change, and there is no flakiness any more. Author: Lucas Wang Reviewers: Mayuresh Gharat , Dong Lin Closes #5387 from gitlw/fixing_flacky_logrecevorytest --- .../test/scala/unit/kafka/server/LogRecoveryTest.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala b/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala index 880950ae02c..1bd15f7b537 100755 --- a/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala +++ b/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala @@ -143,6 +143,15 @@ class LogRecoveryTest extends ZooKeeperTestHarness { leader == 0 || leader == 1) assertEquals(hw, hwFile1.read.getOrElse(topicPartition, 0L)) + /** We plan to shutdown server2 and transfer the leadership to server1. + * With unclean leader election turned off, a prerequisite for the successful leadership transition + * is that server1 has caught up on the topicPartition, and has joined the ISR. + * In the line below, we wait until the condition is met before shutting down server2 + */ + waitUntilTrue(() => server2.replicaManager.getPartition(topicPartition).get.inSyncReplicas.size == 2, + "Server 1 is not able to join the ISR after restart") + + // since server 2 was never shut down, the hw value of 30 is probably not checkpointed to disk yet server2.shutdown() assertEquals(hw, hwFile2.read.getOrElse(topicPartition, 0L))