@ -48,8 +48,9 @@ public class StoreChangelogReader implements ChangelogReader {
private final Map < TopicPartition , Long > endOffsets = new HashMap < > ( ) ;
private final Map < TopicPartition , Long > endOffsets = new HashMap < > ( ) ;
private final Map < String , List < PartitionInfo > > partitionInfo = new HashMap < > ( ) ;
private final Map < String , List < PartitionInfo > > partitionInfo = new HashMap < > ( ) ;
private final Map < TopicPartition , StateRestorer > stateRestorers = new HashMap < > ( ) ;
private final Map < TopicPartition , StateRestorer > stateRestorers = new HashMap < > ( ) ;
private final Map < TopicPartition , StateRestorer > needsRestoring = new HashMap < > ( ) ;
private final Set < TopicPartition > needsRestoring = new HashSet < > ( ) ;
private final Map < TopicPartition , StateRestorer > needsInitializing = new HashMap < > ( ) ;
private final Set < TopicPartition > needsInitializing = new HashSet < > ( ) ;
private final Set < TopicPartition > completedRestorers = new HashSet < > ( ) ;
private final Duration pollTime ;
private final Duration pollTime ;
public StoreChangelogReader ( final Consumer < byte [ ] , byte [ ] > restoreConsumer ,
public StoreChangelogReader ( final Consumer < byte [ ] , byte [ ] > restoreConsumer ,
@ -64,9 +65,14 @@ public class StoreChangelogReader implements ChangelogReader {
@Override
@Override
public void register ( final StateRestorer restorer ) {
public void register ( final StateRestorer restorer ) {
if ( ! stateRestorers . containsKey ( restorer . partition ( ) ) ) {
restorer . setUserRestoreListener ( userStateRestoreListener ) ;
restorer . setUserRestoreListener ( userStateRestoreListener ) ;
stateRestorers . put ( restorer . partition ( ) , restorer ) ;
stateRestorers . put ( restorer . partition ( ) , restorer ) ;
needsInitializing . put ( restorer . partition ( ) , restorer ) ;
log . trace ( "Added restorer for changelog {}" , restorer . partition ( ) ) ;
}
needsInitializing . add ( restorer . partition ( ) ) ;
}
}
public Collection < TopicPartition > restore ( final RestoringTasks active ) {
public Collection < TopicPartition > restore ( final RestoringTasks active ) {
@ -81,16 +87,15 @@ public class StoreChangelogReader implements ChangelogReader {
try {
try {
final ConsumerRecords < byte [ ] , byte [ ] > records = restoreConsumer . poll ( pollTime ) ;
final ConsumerRecords < byte [ ] , byte [ ] > records = restoreConsumer . poll ( pollTime ) ;
final Iterator < TopicPartition > iterator = needsRestoring . keySet ( ) . iterator ( ) ;
while ( iterator . hasNext ( ) ) {
for ( final TopicPartition partition : needsRestoring ) {
final TopicPartition partition = iterator . next ( ) ;
final StateRestorer restorer = stateRestorers . get ( partition ) ;
final StateRestorer restorer = stateRestorers . get ( partition ) ;
final long pos = processNext ( records . records ( partition ) , restorer , endOffsets . get ( partition ) ) ;
final long pos = processNext ( records . records ( partition ) , restorer , endOffsets . get ( partition ) ) ;
restorer . setRestoredOffset ( pos ) ;
restorer . setRestoredOffset ( pos ) ;
if ( restorer . hasCompleted ( pos , endOffsets . get ( partition ) ) ) {
if ( restorer . hasCompleted ( pos , endOffsets . get ( partition ) ) ) {
restorer . restoreDone ( ) ;
restorer . restoreDone ( ) ;
endOffsets . remove ( partition ) ;
endOffsets . remove ( partition ) ;
iterator . remove ( ) ;
completedRestorers . add ( partition ) ;
}
}
}
}
} catch ( final InvalidOffsetException recoverableException ) {
} catch ( final InvalidOffsetException recoverableException ) {
@ -98,12 +103,18 @@ public class StoreChangelogReader implements ChangelogReader {
final Set < TopicPartition > partitions = recoverableException . partitions ( ) ;
final Set < TopicPartition > partitions = recoverableException . partitions ( ) ;
for ( final TopicPartition partition : partitions ) {
for ( final TopicPartition partition : partitions ) {
final StreamTask task = active . restoringTaskFor ( partition ) ;
final StreamTask task = active . restoringTaskFor ( partition ) ;
log . info ( "Reinitializing StreamTask {}" , task ) ;
log . info ( "Reinitializing StreamTask {} for changelog {}" , task , partition ) ;
needsInitializing . remove ( partition ) ;
needsRestoring . remove ( partition ) ;
task . reinitializeStateStoresForPartitions ( recoverableException . partitions ( ) ) ;
task . reinitializeStateStoresForPartitions ( recoverableException . partitions ( ) ) ;
}
}
restoreConsumer . seekToBeginning ( partitions ) ;
restoreConsumer . seekToBeginning ( partitions ) ;
}
}
needsRestoring . removeAll ( completedRestorers ) ;
if ( needsRestoring . isEmpty ( ) ) {
if ( needsRestoring . isEmpty ( ) ) {
restoreConsumer . unsubscribe ( ) ;
restoreConsumer . unsubscribe ( ) ;
}
}
@ -120,25 +131,24 @@ public class StoreChangelogReader implements ChangelogReader {
// the needsInitializing map is not empty, meaning we do not know the metadata for some of them yet
// the needsInitializing map is not empty, meaning we do not know the metadata for some of them yet
refreshChangelogInfo ( ) ;
refreshChangelogInfo ( ) ;
final Map < TopicPartition , StateRestorer > initializable = new HashMap < > ( ) ;
final Set < TopicPartition > initializable = new HashSet < > ( ) ;
for ( final Map . Entry < TopicPartition , StateRestorer > entry : needsInitializing . entrySet ( ) ) {
for ( final TopicPartition topicPartition : needsInitializing ) {
final TopicPartition topicPartition = entry . getKey ( ) ;
if ( hasPartition ( topicPartition ) ) {
if ( hasPartition ( topicPartition ) ) {
initializable . put ( entry . getKey ( ) , entry . getValue ( ) ) ;
initializable . add ( topicPartition ) ;
}
}
}
}
// try to fetch end offsets for the initializable restorers and remove any partitions
// try to fetch end offsets for the initializable restorers and remove any partitions
// where we already have all of the data
// where we already have all of the data
try {
try {
endOffsets . putAll ( restoreConsumer . endOffsets ( initializable . keySet ( ) ) ) ;
endOffsets . putAll ( restoreConsumer . endOffsets ( initializable ) ) ;
} catch ( final TimeoutException e ) {
} catch ( final TimeoutException e ) {
// if timeout exception gets thrown we just give up this time and retry in the next run loop
// if timeout exception gets thrown we just give up this time and retry in the next run loop
log . debug ( "Could not fetch end offset for {}; will fall back to partition by partition fetching" , initializable ) ;
log . debug ( "Could not fetch end offset for {}; will fall back to partition by partition fetching" , initializable ) ;
return ;
return ;
}
}
final Iterator < TopicPartition > iter = initializable . keySet ( ) . iterator ( ) ;
final Iterator < TopicPartition > iter = initializable . iterator ( ) ;
while ( iter . hasNext ( ) ) {
while ( iter . hasNext ( ) ) {
final TopicPartition topicPartition = iter . next ( ) ;
final TopicPartition topicPartition = iter . next ( ) ;
final Long endOffset = endOffsets . get ( topicPartition ) ;
final Long endOffset = endOffsets . get ( topicPartition ) ;
@ -146,13 +156,15 @@ public class StoreChangelogReader implements ChangelogReader {
// offset should not be null; but since the consumer API does not guarantee it
// offset should not be null; but since the consumer API does not guarantee it
// we add this check just in case
// we add this check just in case
if ( endOffset ! = null ) {
if ( endOffset ! = null ) {
final StateRestorer restorer = needsInitializing . get ( topicPartition ) ;
final StateRestorer restorer = stateRestorers . get ( topicPartition ) ;
if ( restorer . checkpoint ( ) > = endOffset ) {
if ( restorer . checkpoint ( ) > = endOffset ) {
restorer . setRestoredOffset ( restorer . checkpoint ( ) ) ;
restorer . setRestoredOffset ( restorer . checkpoint ( ) ) ;
iter . remove ( ) ;
iter . remove ( ) ;
completedRestorers . add ( topicPartition ) ;
} else if ( restorer . offsetLimit ( ) = = 0 | | endOffset = = 0 ) {
} else if ( restorer . offsetLimit ( ) = = 0 | | endOffset = = 0 ) {
restorer . setRestoredOffset ( 0 ) ;
restorer . setRestoredOffset ( 0 ) ;
iter . remove ( ) ;
iter . remove ( ) ;
completedRestorers . add ( topicPartition ) ;
} else {
} else {
restorer . setEndingOffset ( endOffset ) ;
restorer . setEndingOffset ( endOffset ) ;
}
}
@ -169,42 +181,49 @@ public class StoreChangelogReader implements ChangelogReader {
}
}
}
}
private void startRestoration ( final Map < TopicPartition , StateRestorer > initialized ,
private void startRestoration ( final Set < TopicPartition > initialized ,
final RestoringTasks active ) {
final RestoringTasks active ) {
log . debug ( "Start restoring state stores from changelog topics {}" , initialized . keySet ( ) ) ;
log . debug ( "Start restoring state stores from changelog topics {}" , initialized ) ;
final Set < TopicPartition > assignment = new HashSet < > ( restoreConsumer . assignment ( ) ) ;
final Set < TopicPartition > assignment = new HashSet < > ( restoreConsumer . assignment ( ) ) ;
assignment . addAll ( initialized . keySet ( ) ) ;
assignment . addAll ( initialized ) ;
restoreConsumer . assign ( assignment ) ;
restoreConsumer . assign ( assignment ) ;
final List < StateRestorer > needsPositionUpdate = new ArrayList < > ( ) ;
final List < StateRestorer > needsPositionUpdate = new ArrayList < > ( ) ;
for ( final StateRestorer restorer : initialized . values ( ) ) {
for ( final TopicPartition partition : initialized ) {
final StateRestorer restorer = stateRestorers . get ( partition ) ;
if ( restorer . checkpoint ( ) ! = StateRestorer . NO_CHECKPOINT ) {
if ( restorer . checkpoint ( ) ! = StateRestorer . NO_CHECKPOINT ) {
restoreConsumer . seek ( restorer . partition ( ) , restorer . checkpoint ( ) ) ;
restoreConsumer . seek ( partition , restorer . checkpoint ( ) ) ;
logRestoreOffsets ( restorer . partition ( ) ,
logRestoreOffsets ( partition ,
restorer . checkpoint ( ) ,
restorer . checkpoint ( ) ,
endOffsets . get ( restorer . partition ( ) ) ) ;
endOffsets . get ( partition ) ) ;
restorer . setStartingOffset ( restoreConsumer . position ( restorer . partition ( ) ) ) ;
restorer . setStartingOffset ( restoreConsumer . position ( partition ) ) ;
restorer . restoreStarted ( ) ;
restorer . restoreStarted ( ) ;
} else {
} else {
final StreamTask task = active . restoringTaskFor ( restorer . partition ( ) ) ;
restoreConsumer . seekToBeginning ( Collections . singletonList ( partition ) ) ;
needsPositionUpdate . add ( restorer ) ;
}
}
for ( final StateRestorer restorer : needsPositionUpdate ) {
final TopicPartition partition = restorer . partition ( ) ;
// If checkpoint does not exist it means the task was not shutdown gracefully before;
// If checkpoint does not exist it means the task was not shutdown gracefully before;
// and in this case if EOS is turned on we should wipe out the state and re-initialize the task
// and in this case if EOS is turned on we should wipe out the state and re-initialize the task
final StreamTask task = active . restoringTaskFor ( partition ) ;
if ( task . isEosEnabled ( ) ) {
if ( task . isEosEnabled ( ) ) {
log . info ( "No checkpoint found for task {} state store {} changelog {} with EOS turned on. " +
log . info ( "No checkpoint found for task {} state store {} changelog {} with EOS turned on. " +
"Reinitializing the task and restore its state from the beginning." , task . id , restorer . storeName ( ) , restorer . partition ( ) ) ;
"Reinitializing the task and restore its state from the beginning." , task . id , restorer . storeName ( ) , partition ) ;
task . reinitializeStateStoresForPartitions ( Collections . singleton ( restorer . partition ( ) ) ) ;
} else {
log . info ( "Restoring task {}'s state store {} from beginning of the changelog {} " , task . id , restorer . storeName ( ) , restorer . partition ( ) ) ;
}
restoreConsumer . seekToBeginning ( Collections . singletonList ( restorer . partition ( ) ) ) ;
needsInitializing . remove ( partition ) ;
needsPositionUpdate . add ( restorer ) ;
initialized . remove ( partition ) ;
}
restorer . setCheckpointOffset ( restoreConsumer . position ( partition ) ) ;
}
task . reinitializeStateStoresForPartitions ( Collections . singleton ( partition ) ) ;
} else {
log . info ( "Restoring task {}'s state store {} from beginning of the changelog {} " , task . id , restorer . storeName ( ) , partition ) ;
for ( final StateRestorer restorer : needsPositionUpdate ) {
final long position = restoreConsumer . position ( restorer . partition ( ) ) ;
final long position = restoreConsumer . position ( restorer . partition ( ) ) ;
logRestoreOffsets ( restorer . partition ( ) ,
logRestoreOffsets ( restorer . partition ( ) ,
position ,
position ,
@ -212,8 +231,9 @@ public class StoreChangelogReader implements ChangelogReader {
restorer . setStartingOffset ( position ) ;
restorer . setStartingOffset ( position ) ;
restorer . restoreStarted ( ) ;
restorer . restoreStarted ( ) ;
}
}
}
needsRestoring . put All( initialized ) ;
needsRestoring . add All( initialized ) ;
}
}
private void logRestoreOffsets ( final TopicPartition partition ,
private void logRestoreOffsets ( final TopicPartition partition ,
@ -226,10 +246,7 @@ public class StoreChangelogReader implements ChangelogReader {
}
}
private Collection < TopicPartition > completed ( ) {
private Collection < TopicPartition > completed ( ) {
final Set < TopicPartition > completed = new HashSet < > ( stateRestorers . keySet ( ) ) ;
return completedRestorers ;
completed . removeAll ( needsRestoring . keySet ( ) ) ;
log . trace ( "The set of restoration completed partitions so far: {}" , completed ) ;
return completed ;
}
}
private void refreshChangelogInfo ( ) {
private void refreshChangelogInfo ( ) {