@ -152,26 +152,28 @@ class GroupCoordinator(val brokerId: Int,
@@ -152,26 +152,28 @@ class GroupCoordinator(val brokerId: Int,
}
}
def handleJoinGroup ( groupId : String ,
memberId : String ,
groupInstanceId : Option [ String ] ,
requireKnownMemberId : Boolean ,
clientId : String ,
clientHost : String ,
def handleJoinGroup ( groupId : String , // 消费者groupId
memberId : String , // 消费者groupId
groupInstanceId : Option [ String ] , // 消费者组实例id
requireKnownMemberId : Boolean , // 是否需要成员ID不为空
clientId : String , // 消费者clientId
clientHost : String , // 消费者主机名
rebalanceTimeoutMs : Int ,
sessionTimeoutMs : Int ,
protocolType : String ,
protocols : List [ ( String , Array [ Byte ] ) ] ,
responseCallback : JoinCallback ) : Unit = {
// 校验入参groupId及状态
validateGroupStatus ( groupId , ApiKeys . JOIN_GROUP ) . foreach { error =>
responseCallback ( JoinGroupResult ( memberId , error ) )
return
}
// 校验入参sessionTimeoutMs
if ( sessionTimeoutMs < groupConfig . groupMinSessionTimeoutMs ||
sessionTimeoutMs > groupConfig . groupMaxSessionTimeoutMs ) {
responseCallback ( JoinGroupResult ( memberId , Errors . INVALID_SESSION_TIMEOUT ) )
} else {
// 消费者组成员ID是否为空
val isUnknownMember = memberId == JoinGroupRequest . UNKNOWN_MEMBER_ID
// group is created if it does not exist and the member id is UNKNOWN . if member
// is specified but group does not exist , request is rejected with UNKNOWN_MEMBER_ID
@ -180,12 +182,17 @@ class GroupCoordinator(val brokerId: Int,
@@ -180,12 +182,17 @@ class GroupCoordinator(val brokerId: Int,
responseCallback ( JoinGroupResult ( memberId , Errors . UNKNOWN_MEMBER_ID ) )
case Some ( group ) =>
group . inLock {
// 判断消费者组是否有足够空间接受当前成员
if ( ! acceptJoiningMember ( group , memberId ) ) {
group . remove ( memberId )
responseCallback ( JoinGroupResult ( JoinGroupRequest . UNKNOWN_MEMBER_ID , Errors . GROUP_MAX_SIZE_REACHED ) )
} else if ( isUnknownMember ) {
}
// 安排空id成员入组
else if ( isUnknownMember ) {
doUnknownJoinGroup ( group , groupInstanceId , requireKnownMemberId , clientId , clientHost , rebalanceTimeoutMs , sessionTimeoutMs , protocolType , protocols , responseCallback )
} else {
}
// 安排非空id成员入组
else {
doJoinGroup ( group , memberId , groupInstanceId , clientId , clientHost , rebalanceTimeoutMs , sessionTimeoutMs , protocolType , protocols , responseCallback )
}
@ -209,20 +216,35 @@ class GroupCoordinator(val brokerId: Int,
@@ -209,20 +216,35 @@ class GroupCoordinator(val brokerId: Int,
protocols : List [ ( String , Array [ Byte ] ) ] ,
responseCallback : JoinCallback ) : Unit = {
group . inLock {
// 如果消费者组状态为Dead
if ( group . is ( Dead ) ) {
// if the group is marked as dead , it means some other thread has just removed the group
// from the coordinator metadata ; it is likely that the group has migrated to some other
// coordinator OR the group is in a transient unstable phase . Let the member retry
// finding the correct coordinator and rejoin .
responseCallback ( JoinGroupResult ( JoinGroupRequest . UNKNOWN_MEMBER_ID , Errors . COORDINATOR_NOT_AVAILABLE ) )
} else if ( ! group . supportsProtocols ( protocolType , MemberMetadata . plainProtocolSet ( protocols ) ) ) {
}
// 检查消费者版本和策略
// 如果成员配置的协议类型 / 分区消费分配策略与消费者组的不匹配 , 封装INCONSISTENT_GROUP_PROTOCOL异常并调用回调函数返回
// 这里需要注意一点 : 新加入成员的设置的分区分配策略 , 必须至少有一个策略是组内所有成员都支持的 , 因为消费者组选举分区分配策略时
// 第一步就是要获取所有成员都支持的分区分配策略 , 否则无法选举
else if ( ! group . supportsProtocols ( protocolType , MemberMetadata . plainProtocolSet ( protocols ) ) ) {
responseCallback ( JoinGroupResult ( JoinGroupRequest . UNKNOWN_MEMBER_ID , Errors . INCONSISTENT_GROUP_PROTOCOL ) )
} else {
val newMemberId = group . generateMemberId ( clientId , groupInstanceId )
}
else {
// 服务端生成memberId , 用 `client.id-${UUID}` 或者 `groupInstanceId.id-${UUID}` 拼接而成
val newMemberId = group . generateMemberId ( clientId , groupInstanceId )
// 添加静态成员 , 并触发rebalance
if ( group . hasStaticMember ( groupInstanceId ) ) {
updateStaticMemberAndRebalance ( group , newMemberId , groupInstanceId , protocols , responseCallback )
} else if ( requireKnownMemberId ) {
}
// 如果要求成员ID不为空 , 默认为true
// 当满足条件 joinGroupRequest . version >= 4 && groupInstanceId . isEmpty , requireKnownMemberId为true
else if ( requireKnownMemberId ) {
// 如果申请加入组的成员 memberId 为空 , 服务端会先生成一个memberId然后将该请求 "打回去" , 携带生成的 memberId 和 MEMBER_ID_REQUIRED 异常信息 。
// 当客户端收到包含该异常信息的响应 , 会根据返回的 memberId更新自身的信息 , 并重新发送 JoinGroupRequest , 之后就会调用 doJoinGroup 方法了
// If member id required ( dynamic membership ) , register the member in the pending member list
// and send back a response to call for another join group request with allocated member id .
debug ( s" Dynamic member with unknown member id joins group ${ group . groupId } in " +
@ -230,7 +252,9 @@ class GroupCoordinator(val brokerId: Int,
@@ -230,7 +252,9 @@ class GroupCoordinator(val brokerId: Int,
group . addPendingMember ( newMemberId )
addPendingMemberExpiration ( group , newMemberId , sessionTimeoutMs )
responseCallback ( JoinGroupResult ( newMemberId , Errors . MEMBER_ID_REQUIRED ) )
} else {
}
// 增加member并触发rebalance
else {
info ( s" ${ if ( groupInstanceId . isDefined ) "Static" else "Dynamic" } Member with unknown member id joins group ${ group . groupId } in " +
s" ${ group . currentState } state. Created a new member id $newMemberId for this member and add to the group. " )
addMemberAndRebalance ( rebalanceTimeoutMs , sessionTimeoutMs , newMemberId , groupInstanceId ,
@ -240,6 +264,7 @@ class GroupCoordinator(val brokerId: Int,
@@ -240,6 +264,7 @@ class GroupCoordinator(val brokerId: Int,
}
}
// 安排设置了memberId的消费者加入组
private def doJoinGroup ( group : GroupMetadata ,
memberId : String ,
groupInstanceId : Option [ String ] ,
@ -251,6 +276,7 @@ class GroupCoordinator(val brokerId: Int,
@@ -251,6 +276,7 @@ class GroupCoordinator(val brokerId: Int,
protocols : List [ ( String , Array [ Byte ] ) ] ,
responseCallback : JoinCallback ) : Unit = {
group . inLock {
// 前置检查 , 各类异常情况
if ( group . is ( Dead ) ) {
// if the group is marked as dead , it means some other thread has just removed the group
// from the coordinator metadata ; this is likely that the group has migrated to some other
@ -259,7 +285,9 @@ class GroupCoordinator(val brokerId: Int,
@@ -259,7 +285,9 @@ class GroupCoordinator(val brokerId: Int,
responseCallback ( JoinGroupResult ( memberId , Errors . COORDINATOR_NOT_AVAILABLE ) )
} else if ( ! group . supportsProtocols ( protocolType , MemberMetadata . plainProtocolSet ( protocols ) ) ) {
responseCallback ( JoinGroupResult ( memberId , Errors . INCONSISTENT_GROUP_PROTOCOL ) )
} else if ( group . isPendingMember ( memberId ) ) {
}
// 第一阶段 , 处理 `待决成员` 入组申请
else if ( group . isPendingMember ( memberId ) ) {
// A rejoining pending member will be accepted . Note that pending member will never be a static member .
if ( groupInstanceId . isDefined ) {
throw new IllegalStateException ( s" the static member $groupInstanceId was not expected to be assigned " +
@ -267,31 +295,50 @@ class GroupCoordinator(val brokerId: Int,
@@ -267,31 +295,50 @@ class GroupCoordinator(val brokerId: Int,
} else {
debug ( s" Dynamic Member with specific member id $memberId joins group ${ group . groupId } in " +
s" ${ group . currentState } state. Adding to the group now. " )
// `待决成员` 加入组
addMemberAndRebalance ( rebalanceTimeoutMs , sessionTimeoutMs , memberId , groupInstanceId ,
clientId , clientHost , protocolType , protocols , group , responseCallback )
}
} else {
}
// 第二阶段 , 处理 `非待决成员` 的入组申请
else {
// 消费者组实例id没找到
val groupInstanceIdNotFound = groupInstanceId . isDefined && ! group . hasStaticMember ( groupInstanceId )
// 检查static member . id是否最新
if ( group . isStaticMemberFenced ( memberId , groupInstanceId , "join-group" ) ) {
// given member id doesn 't match with the groupInstanceId . Inform duplicate instance to shut down immediately .
responseCallback ( JoinGroupResult ( memberId , Errors . FENCED_INSTANCE_ID ) )
} else if ( ! group . has ( memberId ) || groupInstanceIdNotFound ) {
}
// memberId找不到 , 失败
else if ( ! group . has ( memberId ) || groupInstanceIdNotFound ) {
// If the dynamic member trying to register with an unrecognized id , or
// the static member joins with unknown group instance id , send the response to let
// it reset its member id and retry .
responseCallback ( JoinGroupResult ( memberId , Errors . UNKNOWN_MEMBER_ID ) )
} else {
}
//
else {
// 获取成员的元数据
val member = group . get ( memberId )
group . currentState match {
// 如果是PreparingRebalance状态 , 就说明消费者组正要开启 Rebalance 流程 ,
// 那么 , 调用 updateMemberAndRebalance 方法更新成员信息 , 并开始准备 Rebalance 即可 。
// 更新成员信息并开始准备Rebalance
// GroupCoordinator状态机 : PreparingRebalance => PreparingRebalance
case PreparingRebalance =>
updateMemberAndRebalance ( group , member , protocols , s" Member ${ member . memberId } joining group during ${ group . currentState } " , responseCallback )
// GroupCoordinator状态机 : CompletingRebalance => PreparingRebalance
case CompletingRebalance =>
// 如果成员以前申请过加入组
if ( member . matches ( protocols ) ) {
// member is joining with the same metadata ( which could be because it failed to
// receive the initial JoinGroup response ) , so just return current group information
// for the current generation .
// 就判断一下 , 该成员的分区消费分配策略与订阅分区列表是否和已保存记录中的一致 ,
// 如果相同 , 就说明该成员已经应该发起过加入组的操作 , 并且 Coordinator 已经批准了 , 只是该成员没有收到 ,
// 因此 , 针对这种情况 , 代码构造一个 JoinGroupResult 对象 , 直接返回当前的组信息给成员 。
responseCallback ( JoinGroupResult (
members = if ( group . isLeader ( memberId ) ) {
group . currentMemberMetadata
@ -306,19 +353,27 @@ class GroupCoordinator(val brokerId: Int,
@@ -306,19 +353,27 @@ class GroupCoordinator(val brokerId: Int,
error = Errors . NONE ) )
} else {
// member has changed metadata , so force a rebalance
// 否则 , 就说明成员变更了订阅信息或分配策略 , 更新成员信息并开始准备Rebalance
updateMemberAndRebalance ( group , member , protocols , s" Updating metadata for member ${ member . memberId } during ${ group . currentState } " , responseCallback )
}
// Stable状态
// GroupCoordinator状态机 : Stable => PreparingRebalance
case Stable =>
val member = group . get ( memberId )
// 如果成员是Leader成员 , 强制Rebalance
if ( group . isLeader ( memberId ) ) {
// force a rebalance if the leader sends JoinGroup ;
// This allows the leader to trigger rebalances for changes affecting assignment
// which do not affect the member metadata ( such as topic metadata changes for the consumer )
updateMemberAndRebalance ( group , member , protocols , s" leader ${ member . memberId } re-joining group during ${ group . currentState } " , responseCallback )
} else if ( ! member . matches ( protocols ) ) {
}
// 如果成员元数据发生变更 , 强制Rebalance
else if ( ! member . matches ( protocols ) ) {
updateMemberAndRebalance ( group , member , protocols , s" Updating metadata for member ${ member . memberId } during ${ group . currentState } " , responseCallback )
} else {
}
// 如果不属于上述2种情况 , 仅返回当前组信息
else {
// for followers with no actual change to their metadata , just return group information
// for the current generation which will allow them to issue SyncGroup
responseCallback ( JoinGroupResult (
@ -331,6 +386,7 @@ class GroupCoordinator(val brokerId: Int,
@@ -331,6 +386,7 @@ class GroupCoordinator(val brokerId: Int,
error = Errors . NONE ) )
}
// 异常状态 , 返回
case Empty | Dead =>
// Group reaches unexpected state . Let the joining member reset their generation and rejoin .
warn ( s" Attempt to add rejoining member $memberId of group ${ group . groupId } in " +
@ -342,13 +398,14 @@ class GroupCoordinator(val brokerId: Int,
@@ -342,13 +398,14 @@ class GroupCoordinator(val brokerId: Int,
}
}
// 处理SYNC_GROUP请求
def handleSyncGroup ( groupId : String ,
generation : Int ,
memberId : String ,
protocolType : Option [ String ] ,
protocolName : Option [ String ] ,
groupInstanceId : Option [ String ] ,
groupAssignment : Map [ String , Array [ Byte ] ] ,
groupAssignment : Map [ String , Array [ Byte ] ] , // 消费者提交的分区分配方案
responseCallback : SyncCallback ) : Unit = {
validateGroupStatus ( groupId , ApiKeys . SYNC_GROUP ) match {
case Some ( error ) if error == Errors . COORDINATOR_LOAD_IN_PROGRESS =>
@ -361,8 +418,10 @@ class GroupCoordinator(val brokerId: Int,
@@ -361,8 +418,10 @@ class GroupCoordinator(val brokerId: Int,
case Some ( error ) => responseCallback ( SyncGroupResult ( error ) )
case None =>
// 获取组元素对象
groupManager . getGroup ( groupId ) match {
case None => responseCallback ( SyncGroupResult ( Errors . UNKNOWN_MEMBER_ID ) )
// 执行doSyncGroup
case Some ( group ) => doSyncGroup ( group , generation , memberId , protocolType , protocolName ,
groupInstanceId , groupAssignment , responseCallback )
}
@ -402,6 +461,9 @@ class GroupCoordinator(val brokerId: Int,
@@ -402,6 +461,9 @@ class GroupCoordinator(val brokerId: Int,
case PreparingRebalance =>
responseCallback ( SyncGroupResult ( Errors . REBALANCE_IN_PROGRESS ) )
// 正常场景 , 收到leader的sync_group请求后
// 1. 调用GroupMetadataManager . storeGroup保存组信息
// 2. group状态更新到Stable
case CompletingRebalance =>
group . get ( memberId ) . awaitingSyncCallback = responseCallback
removePendingSyncMember ( group , memberId )
@ -411,6 +473,7 @@ class GroupCoordinator(val brokerId: Int,
@@ -411,6 +473,7 @@ class GroupCoordinator(val brokerId: Int,
info ( s" Assignment received from leader for group ${ group . groupId } for generation ${ group . generationId } . " +
s" The group has ${ group . size } members, ${ group . allStaticMembers . size } of which are static. " )
// 补全缺失的member和assignments
// fill any missing members with an empty assignment
val missing = group . allMembers . diff ( groupAssignment . keySet )
val assignment = groupAssignment ++ missing . map ( _ -> Array . empty [ Byte ] ) . toMap
@ -419,17 +482,25 @@ class GroupCoordinator(val brokerId: Int,
@@ -419,17 +482,25 @@ class GroupCoordinator(val brokerId: Int,
warn ( s" Setting empty assignments for members $missing of ${ group . groupId } for generation ${ group . generationId } " )
}
// 调用GroupMetadataManager . storeGroup保存组信息
groupManager . storeGroup ( group , assignment , ( error : Errors ) => {
group . inLock {
// another member may have joined the group while we were awaiting this callback ,
// so we must ensure we are still in the CompletingRebalance state and the same generation
// when it gets invoked . if we have transitioned to another state , then do nothing
if ( group . is ( CompletingRebalance ) && generationId == group . generationId ) {
// 有错误
if ( error != Errors . NONE ) {
// 清空分配方案并发送给所有成员
resetAndPropagateAssignmentError ( group , error )
// 准备开启新一轮的Rebalance
maybePrepareRebalance ( group , s" error when storing group assignment during SyncGroup (member: $memberId ) " )
} else {
}
// 没有错误 ,
else {
// 在消费者组元数据中为每个消费者成员保存分配方案并发送给所有成员
setAndPropagateAssignment ( group , assignment )
// group状态更新到Stable
group . transitionTo ( Stable )
}
}
@ -606,11 +677,13 @@ class GroupCoordinator(val brokerId: Int,
@@ -606,11 +677,13 @@ class GroupCoordinator(val brokerId: Int,
groupError -> partitionErrors
}
// GroupCoordinator处理Heartbeat
def handleHeartbeat ( groupId : String ,
memberId : String ,
groupInstanceId : Option [ String ] ,
generationId : Int ,
responseCallback : Errors => Unit ) : Unit = {
// 当前Coordinator处于loading状态
validateGroupStatus ( groupId , ApiKeys . HEARTBEAT ) . foreach { error =>
if ( error == Errors . COORDINATOR_LOAD_IN_PROGRESS )
// the group is still loading , so respond just blindly
@ -620,11 +693,13 @@ class GroupCoordinator(val brokerId: Int,
@@ -620,11 +693,13 @@ class GroupCoordinator(val brokerId: Int,
return
}
//
groupManager . getGroup ( groupId ) match {
case None =>
responseCallback ( Errors . UNKNOWN_MEMBER_ID )
case Some ( group ) => group . inLock {
// 异常情况
if ( group . is ( Dead ) ) {
// if the group is marked as dead , it means some other thread has just removed the group
// from the coordinator metadata ; this is likely that the group has migrated to some other
@ -637,7 +712,10 @@ class GroupCoordinator(val brokerId: Int,
@@ -637,7 +712,10 @@ class GroupCoordinator(val brokerId: Int,
responseCallback ( Errors . UNKNOWN_MEMBER_ID )
} else if ( generationId != group . generationId ) {
responseCallback ( Errors . ILLEGAL_GENERATION )
} else {
}
// 正常情况下都会标记heartbeat成功
else {
group . currentState match {
case Empty =>
responseCallback ( Errors . UNKNOWN_MEMBER_ID )
@ -649,6 +727,7 @@ class GroupCoordinator(val brokerId: Int,
@@ -649,6 +727,7 @@ class GroupCoordinator(val brokerId: Int,
completeAndScheduleNextHeartbeatExpiration ( group , member )
responseCallback ( Errors . NONE )
// PreparingRebalance返回REBALANCE_IN_PROGRESS , 客户端也会按正常返回处理
case PreparingRebalance =>
val member = group . get ( memberId )
completeAndScheduleNextHeartbeatExpiration ( group , member )
@ -928,31 +1007,40 @@ class GroupCoordinator(val brokerId: Int,
@@ -928,31 +1007,40 @@ class GroupCoordinator(val brokerId: Int,
private def setAndPropagateAssignment ( group : GroupMetadata , assignment : Map [ String , Array [ Byte ] ] ) : Unit = {
assert ( group . is ( CompletingRebalance ) )
// 更新member . assignment
group . allMemberMetadata . foreach ( member => member . assignment = assignment ( member . memberId ) )
propagateAssignment ( group , Errors . NONE )
}
private def resetAndPropagateAssignmentError ( group : GroupMetadata , error : Errors ) : Unit = {
assert ( group . is ( CompletingRebalance ) )
// group所有member分区分配方案更新为empty
group . allMemberMetadata . foreach ( _ . assignment = Array . empty )
// 发送给member
propagateAssignment ( group , error )
}
// 分区分配方案返回给group内所有member
private def propagateAssignment ( group : GroupMetadata , error : Errors ) : Unit = {
val ( protocolType , protocolName ) = if ( error == Errors . NONE )
( group . protocolType , group . protocolName )
else
( None , None )
//
for ( member <- group . allMemberMetadata ) {
if ( member . assignment . isEmpty && error == Errors . NONE ) {
warn ( s" Sending empty assignment to member ${ member . memberId } of ${ group . groupId } for generation ${ group . generationId } with no errors " )
}
// 调用回调函数member . awaitingSyncCallback , 每个消费者只会收到自己的分区分配方案
if ( group . maybeInvokeSyncCallback ( member , SyncGroupResult ( protocolType , protocolName , member . assignment , error ) ) ) {
// reset the session timeout for members after propagating the member 's assignment .
// This is because if any member 's session expired while we were still awaiting either
// the leader sync group or the storage callback , its expiration will be ignored and no
// future heartbeat expectations will not be scheduled .
// 如果返回true , 则设置下次心跳的时间
completeAndScheduleNextHeartbeatExpiration ( group , member )
}
}
@ -965,13 +1053,16 @@ class GroupCoordinator(val brokerId: Int,
@@ -965,13 +1053,16 @@ class GroupCoordinator(val brokerId: Int,
completeAndScheduleNextExpiration ( group , member , member . sessionTimeoutMs )
}
// 完成当前心跳 , 并设置下次心跳的超时时间
private def completeAndScheduleNextExpiration ( group : GroupMetadata , member : MemberMetadata , timeoutMs : Long ) : Unit = {
val memberKey = MemberKey ( group . groupId , member . memberId )
// 完成本次心跳
// complete current heartbeat expectation
member . heartbeatSatisfied = true
heartbeatPurgatory . checkAndComplete ( memberKey )
// 设置下次心跳 , 超时时间timeoutMs
// reschedule the next heartbeat expiration deadline
member . heartbeatSatisfied = false
val delayedHeartbeat = new DelayedHeartbeat ( this , group , member . memberId , isPending = false , timeoutMs )
@ -1003,15 +1094,23 @@ class GroupCoordinator(val brokerId: Int,
@@ -1003,15 +1094,23 @@ class GroupCoordinator(val brokerId: Int,
protocols : List [ ( String , Array [ Byte ] ) ] ,
group : GroupMetadata ,
callback : JoinCallback ) : Unit = {
val member = new MemberMetadata ( memberId , groupInstanceId , clientId , clientHost ,
rebalanceTimeoutMs , sessionTimeoutMs , protocolType , protocols )
// 标识该成员是新成员 , isNew 字段与心跳设置相关联
member . isNew = true
// update the newMemberAdded flag to indicate that the join group can be further delayed
// 组状态是PreparingRebalance且generationId == 0 , 说明是第一次进行Rebalance , 那么设置newMemberAdded = true
// 这个变量的作用 , 是 Kafka 为消费者组 Rebalance 流程做的一个性能优化 。
// 大致的思想 : 消费者组首次进行 Rebalance 时 , 让 Coordinator 多等待一段时间 , 从而让更多的消费者组成员加入到组中 ,
// 以免后来者申请入组而反复进行 Rebalance 。 这段多等待的时间 , 由服务端参数 group . initial . rebalance . delay . ms 设置 。
if ( group . is ( PreparingRebalance ) && group . generationId == 0 )
group . newMemberAdded = true
// 向消费者组添加成员
// 如果还没有选出Leader成员 , 则设置当前成员为Leader ( 重要操作 )
group . add ( member , callback )
// The session timeout does not affect new members since they do not have their memberId and
@ -1020,14 +1119,18 @@ class GroupCoordinator(val brokerId: Int,
@@ -1020,14 +1119,18 @@ class GroupCoordinator(val brokerId: Int,
// timeout during a long rebalance ) , they may simply retry which will lead to a lot of defunct
// members in the rebalance . To prevent this going on indefinitely , we timeout JoinGroup requests
// for new members . If the new member is still there , we expect it to retry .
// 设置下次心跳超期时间
completeAndScheduleNextExpiration ( group , member , NewMemberJoinTimeoutMs )
if ( member . isStaticMember ) {
info ( s" Adding new static member $groupInstanceId to group ${ group . groupId } with member id $memberId . " )
// 静态成员加入组
group . addStaticMember ( groupInstanceId , memberId )
} else {
// 当前已经加入组 , 则从 `待决成员` 中删除
group . removePendingMember ( memberId )
}
// 准备rebalance
maybePrepareRebalance ( group , s" Adding new member $memberId with group instance id $groupInstanceId " )
}
@ -1113,12 +1216,15 @@ class GroupCoordinator(val brokerId: Int,
@@ -1113,12 +1216,15 @@ class GroupCoordinator(val brokerId: Int,
protocols : List [ ( String , Array [ Byte ] ) ] ,
reason : String ,
callback : JoinCallback ) : Unit = {
// 更新组成员信息 ; 调用 GroupMetadata 的 updateMember 方法来更新消费者组成员 ;
group . updateMember ( member , protocols , callback )
// 这一步的核心思想 , 是将消费者组状态变更到 PreparingRebalance , 然后创建 DelayedJoin 对象 , 并交由 Purgatory , 等待延时处理加入组操作
maybePrepareRebalance ( group , reason )
}
private def maybePrepareRebalance ( group : GroupMetadata , reason : String ) : Unit = {
group . inLock {
// 状态属于三者之一 Stable , CompletingRebalance , Empty
if ( group . canRebalance )
prepareRebalance ( group , reason )
}
@ -1126,6 +1232,7 @@ class GroupCoordinator(val brokerId: Int,
@@ -1126,6 +1232,7 @@ class GroupCoordinator(val brokerId: Int,
// package private for testing
private [ group ] def prepareRebalance ( group : GroupMetadata , reason : String ) : Unit = {
// 如果当前CompletingRebalance , 清空分配方案并返回REBALANCE_IN_PROGRESS
// if any members are awaiting sync , cancel their request and have them rejoin
if ( group . is ( CompletingRebalance ) )
resetAndPropagateAssignmentError ( group , Errors . REBALANCE_IN_PROGRESS )
@ -1133,6 +1240,8 @@ class GroupCoordinator(val brokerId: Int,
@@ -1133,6 +1240,8 @@ class GroupCoordinator(val brokerId: Int,
// if a sync expiration is pending , cancel it .
removeSyncExpiration ( group )
// 如果是Empty状态 , 则初始化InitialDelayedJoin对象
// 如果是Stable状态 , 则初始化DelayedJoin对象
val delayedRebalance = if ( group . is ( Empty ) )
new InitialDelayedJoin ( this ,
rebalancePurgatory ,
@ -1143,12 +1252,14 @@ class GroupCoordinator(val brokerId: Int,
@@ -1143,12 +1252,14 @@ class GroupCoordinator(val brokerId: Int,
else
new DelayedJoin ( this , group , group . rebalanceTimeoutMs )
// 状态更新到PreparingRebalance
group . transitionTo ( PreparingRebalance )
info ( s" Preparing to rebalance group ${ group . groupId } in state ${ group . currentState } with old generation " +
s" ${ group . generationId } ( ${ Topic . GROUP_METADATA_TOPIC_NAME } - ${ partitionFor ( group . groupId ) } ) (reason: $reason ) " )
val groupKey = GroupJoinKey ( group . groupId )
// 尝试完成加入组操作 , 如果没有完成 , 则设置监听 , 延时进行加入
rebalancePurgatory . tryCompleteElseWatch ( delayedRebalance , Seq ( groupKey ) )
}
@ -1184,20 +1295,25 @@ class GroupCoordinator(val brokerId: Int,
@@ -1184,20 +1295,25 @@ class GroupCoordinator(val brokerId: Int,
def onCompleteJoin ( group : GroupMetadata ) : Unit = {
group . inLock {
// 尚未加入group的消费者
val notYetRejoinedDynamicMembers = group . notYetRejoinedMembers . filterNot ( _ . _2 . isStaticMember )
if ( notYetRejoinedDynamicMembers . nonEmpty ) {
info ( s" Group ${ group . groupId } removed dynamic members " +
s" who haven't joined: ${ notYetRejoinedDynamicMembers . keySet } " )
// 清理未加入组的消费者 , 取消heartbeat
notYetRejoinedDynamicMembers . values . foreach { failedMember =>
removeHeartbeatForLeavingMember ( group , failedMember )
group . remove ( failedMember . memberId )
}
}
// 组的状态 == dead
if ( group . is ( Dead ) ) {
info ( s" Group ${ group . groupId } is dead, skipping rebalance stage " )
} else if ( ! group . maybeElectNewJoinedLeader ( ) && group . allMembers . nonEmpty ) {
}
// 如果组成员不为空 , 且还未选出Leader成员
else if ( ! group . maybeElectNewJoinedLeader ( ) && group . allMembers . nonEmpty ) {
// If all members are not rejoining , we will postpone the completion
// of rebalance preparing stage , and send out another delayed operation
// until session timeout removes all the non - responsive members .
@ -1205,8 +1321,11 @@ class GroupCoordinator(val brokerId: Int,
@@ -1205,8 +1321,11 @@ class GroupCoordinator(val brokerId: Int,
rebalancePurgatory . tryCompleteElseWatch (
new DelayedJoin ( this , group , group . rebalanceTimeoutMs ) ,
Seq ( GroupJoinKey ( group . groupId ) ) )
} else {
}
else {
group . initNextGeneration ( )
// 组为空
if ( group . is ( Empty ) ) {
info ( s" Group ${ group . groupId } with generation ${ group . generationId } is now empty " +
s" ( ${ Topic . GROUP_METADATA_TOPIC_NAME } - ${ partitionFor ( group . groupId ) } ) " )
@ -1219,13 +1338,17 @@ class GroupCoordinator(val brokerId: Int,
@@ -1219,13 +1338,17 @@ class GroupCoordinator(val brokerId: Int,
warn ( s" Failed to write empty metadata for group ${ group . groupId } : ${ error . message } " )
}
} )
} else {
}
// 组不为空
else {
info ( s" Stabilized group ${ group . groupId } generation ${ group . generationId } " +
s" ( ${ Topic . GROUP_METADATA_TOPIC_NAME } - ${ partitionFor ( group . groupId ) } ) with ${ group . size } members " )
// 遍历所有组员
// trigger the awaiting join group response callback for all the members after rebalancing
for ( member <- group . allMemberMetadata ) {
val joinResult = JoinGroupResult (
// 重要1 , members信息仅发送给leader
members = if ( group . isLeader ( member . memberId ) ) {
group . currentMemberMetadata
} else {
@ -1233,18 +1356,23 @@ class GroupCoordinator(val brokerId: Int,
@@ -1233,18 +1356,23 @@ class GroupCoordinator(val brokerId: Int,
} ,
memberId = member . memberId ,
generationId = group . generationId ,
// 重要2 , 确定分区分配策略
// 服务端只是帮忙确定了整个组的分区分配策略 , 而分配消费分区的任务则交给了 Leader 消费者 。
protocolType = group . protocolType ,
protocolName = group . protocolName ,
leaderId = group . leaderOrNull ,
error = Errors . NONE )
// 调用回调函数返回
group . maybeInvokeJoinCallback ( member , joinResult )
// 完成当前心跳任务并设置下一个
completeAndScheduleNextHeartbeatExpiration ( group , member )
// 标记该成员为非新成员
member . isNew = false
// pendingSyncMembers新增member
group . addPendingSyncMember ( member . memberId )
}
// 调度PendingSync任务 , 这个是啥 ?
schedulePendingSync ( group )
}
}