@ -18,6 +18,7 @@ package org.apache.kafka.streams.kstream;
@@ -18,6 +18,7 @@ package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.annotation.InterfaceStability ;
import org.apache.kafka.common.serialization.Serde ;
import org.apache.kafka.common.utils.Bytes ;
import org.apache.kafka.streams.KafkaStreams ;
import org.apache.kafka.streams.StreamsConfig ;
import org.apache.kafka.streams.processor.StateStoreSupplier ;
@ -80,9 +81,52 @@ public interface KGroupedTable<K, V> {
@@ -80,9 +81,52 @@ public interface KGroupedTable<K, V> {
* alphanumerics , '.' , '_' and '-' . If { @code null } this is the equivalent of { @link KGroupedTable # count ( ) } .
* @return a { @link KTable } that contains "update" records with unmodified keys and { @link Long } values that
* represent the latest ( rolling ) count ( i . e . , number of records ) for each key
* @deprecated use { @link # count ( Materialized ) }
* /
@Deprecated
KTable < K , Long > count ( final String queryableStoreName ) ;
/ * *
* Count number of records of the original { @link KTable } that got { @link KTable # groupBy ( KeyValueMapper ) mapped } to
* the same key into a new instance of { @link KTable } .
* Records with { @code null } key are ignored .
* The result is written into a local { @link KeyValueStore } ( which is basically an ever - updating materialized view )
* that can be queried using the provided { @code queryableStoreName } .
* Furthermore , updates to the store are sent downstream into a { @link KTable } changelog stream .
* < p >
* Not all updates might get sent downstream , as an internal cache is used to deduplicate consecutive updates to
* the same key .
* The rate of propagated updates depends on your input data rate , the number of distinct keys , the number of
* parallel running Kafka Streams instances , and the { @link StreamsConfig configuration } parameters for
* { @link StreamsConfig # CACHE_MAX_BYTES_BUFFERING_CONFIG cache size } , and
* { @link StreamsConfig # COMMIT_INTERVAL_MS_CONFIG commit intervall } .
* < p >
* To query the local { @link KeyValueStore } it must be obtained via
* { @link KafkaStreams # store ( String , QueryableStoreType ) KafkaStreams # store ( . . . ) } :
* < pre > { @code
* KafkaStreams streams = . . . // counting words
* ReadOnlyKeyValueStore < String , Long > localStore = streams . store ( queryableStoreName , QueryableStoreTypes . < String , Long > keyValueStore ( ) ) ;
* String key = "some-word" ;
* Long countForWord = localStore . get ( key ) ; // key must be local (application state is shared over all running Kafka Streams instances)
* } < / pre >
* For non - local keys , a custom RPC mechanism must be implemented using { @link KafkaStreams # allMetadata ( ) } to
* query the value of the key on a parallel running instance of your Kafka Streams application .
* < p >
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka .
* The changelog topic will be named "${applicationId}-${queryableStoreName}-changelog" , where "applicationId" is
* user - specified in { @link StreamsConfig } via parameter
* { @link StreamsConfig # APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG } , "queryableStoreName" is the
* provide { @code queryableStoreName } , and "-changelog" is a fixed suffix .
* The store name must be a valid Kafka topic name and cannot contain characters other than ASCII alphanumerics ,
* '.' , '_' and '-' .
* You can retrieve all generated internal topic names via { @link KafkaStreams # toString ( ) } .
*
* @param materialized the instance of { @link Materialized } used to materialize the state store . Cannot be { @code null }
* @return a { @link KTable } that contains "update" records with unmodified keys and { @link Long } values that
* represent the latest ( rolling ) count ( i . e . , number of records ) for each key
* /
KTable < K , Long > count ( final Materialized < K , Long , KeyValueStore < Bytes , byte [ ] > > materialized ) ;
/ * *
* Count number of records of the original { @link KTable } that got { @link KTable # groupBy ( KeyValueMapper ) mapped } to
* the same key into a new instance of { @link KTable } .
@ -148,7 +192,9 @@ public interface KGroupedTable<K, V> {
@@ -148,7 +192,9 @@ public interface KGroupedTable<K, V> {
* @param storeSupplier user defined state store supplier . Cannot be { @code null } .
* @return a { @link KTable } that contains "update" records with unmodified keys and { @link Long } values that
* represent the latest ( rolling ) count ( i . e . , number of records ) for each key
* @deprecated use { @link # count ( Materialized ) }
* /
@Deprecated
KTable < K , Long > count ( final StateStoreSupplier < KeyValueStore > storeSupplier ) ;
/ * *
@ -218,11 +264,83 @@ public interface KGroupedTable<K, V> {
@@ -218,11 +264,83 @@ public interface KGroupedTable<K, V> {
* '.' , '_' and '-' . If { @code null } this is the equivalent of { @link KGroupedTable # reduce ( Reducer , Reducer ) } ( ) } .
* @return a { @link KTable } that contains "update" records with unmodified keys , and values that represent the
* latest ( rolling ) aggregate for each key
* @deprecated use { @link # reduce ( Reducer , Reducer , Materialized ) }
* /
@Deprecated
KTable < K , V > reduce ( final Reducer < V > adder ,
final Reducer < V > subtractor ,
final String queryableStoreName ) ;
/ * *
* Combine the value of records of the original { @link KTable } that got { @link KTable # groupBy ( KeyValueMapper )
* mapped } to the same key into a new instance of { @link KTable } .
* Records with { @code null } key are ignored .
* Combining implies that the type of the aggregate result is the same as the type of the input value
* ( c . f . { @link # aggregate ( Initializer , Aggregator , Aggregator , Materialized ) } ) .
* The result is written into a local { @link KeyValueStore } ( which is basically an ever - updating materialized view )
* that can be queried using the provided { @code queryableStoreName } .
* Furthermore , updates to the store are sent downstream into a { @link KTable } changelog stream .
* < p >
* Each update to the original { @link KTable } results in a two step update of the result { @link KTable } .
* The specified { @link Reducer adder } is applied for each update record and computes a new aggregate using the
* current aggregate ( first argument ) and the record ' s value ( second argument ) by adding the new record to the
* aggregate .
* The specified { @link Reducer substractor } is applied for each "replaced" record of the original { @link KTable }
* and computes a new aggregate using the current aggregate ( first argument ) and the record ' s value ( second
* argument ) by "removing" the "replaced" record from the aggregate .
* If there is no current aggregate the { @link Reducer } is not applied and the new aggregate will be the record ' s
* value as - is .
* Thus , { @code reduce ( Reducer , Reducer , String ) } can be used to compute aggregate functions like sum .
* For sum , the adder and substractor would work as follows :
* < pre > { @code
* public class SumAdder implements Reducer < Integer > {
* public Integer apply ( Integer currentAgg , Integer newValue ) {
* return currentAgg + newValue ;
* }
* }
*
* public class SumSubtractor implements Reducer < Integer > {
* public Integer apply ( Integer currentAgg , Integer oldValue ) {
* return currentAgg - oldValue ;
* }
* }
* } < / pre >
* Not all updates might get sent downstream , as an internal cache is used to deduplicate consecutive updates to
* the same key .
* The rate of propagated updates depends on your input data rate , the number of distinct keys , the number of
* parallel running Kafka Streams instances , and the { @link StreamsConfig configuration } parameters for
* { @link StreamsConfig # CACHE_MAX_BYTES_BUFFERING_CONFIG cache size } , and
* { @link StreamsConfig # COMMIT_INTERVAL_MS_CONFIG commit intervall } .
* < p >
* To query the local { @link KeyValueStore } it must be obtained via
* { @link KafkaStreams # store ( String , QueryableStoreType ) KafkaStreams # store ( . . . ) } :
* < pre > { @code
* KafkaStreams streams = . . . // counting words
* ReadOnlyKeyValueStore < String , Long > localStore = streams . store ( queryableStoreName , QueryableStoreTypes . < String , Long > keyValueStore ( ) ) ;
* String key = "some-word" ;
* Long countForWord = localStore . get ( key ) ; // key must be local (application state is shared over all running Kafka Streams instances)
* } < / pre >
* For non - local keys , a custom RPC mechanism must be implemented using { @link KafkaStreams # allMetadata ( ) } to
* query the value of the key on a parallel running instance of your Kafka Streams application .
* < p >
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka .
* The changelog topic will be named "${applicationId}-${queryableStoreName}-changelog" , where "applicationId" is
* user - specified in { @link StreamsConfig } via parameter
* { @link StreamsConfig # APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG } , "queryableStoreName" is the
* provide { @code queryableStoreName } , and "-changelog" is a fixed suffix .
* The store name must be a valid Kafka topic name and cannot contain characters other than ASCII alphanumerics ,
* '.' , '_' and '-' .
* You can retrieve all generated internal topic names via { @link KafkaStreams # toString ( ) } .
*
* @param adder a { @link Reducer } that adds a new value to the aggregate result
* @param subtractor a { @link Reducer } that removed an old value from the aggregate result
* @param materialized the instance of { @link Materialized } used to materialize the state store . Cannot be { @code null }
* @return a { @link KTable } that contains "update" records with unmodified keys , and values that represent the
* latest ( rolling ) aggregate for each key
* /
KTable < K , V > reduce ( final Reducer < V > adder ,
final Reducer < V > subtractor ,
final Materialized < K , V , KeyValueStore < Bytes , byte [ ] > > materialized ) ;
/ * *
* Combine the value of records of the original { @link KTable } that got { @link KTable # groupBy ( KeyValueMapper )
* mapped } to the same key into a new instance of { @link KTable } .
@ -344,7 +462,9 @@ public interface KGroupedTable<K, V> {
@@ -344,7 +462,9 @@ public interface KGroupedTable<K, V> {
* @param storeSupplier user defined state store supplier . Cannot be { @code null } .
* @return a { @link KTable } that contains "update" records with unmodified keys , and values that represent the
* latest ( rolling ) aggregate for each key
* @deprecated use { @link # reduce ( Reducer , Reducer , Materialized ) }
* /
@Deprecated
KTable < K , V > reduce ( final Reducer < V > adder ,
final Reducer < V > subtractor ,
final StateStoreSupplier < KeyValueStore > storeSupplier ) ;
@ -427,12 +547,94 @@ public interface KGroupedTable<K, V> {
@@ -427,12 +547,94 @@ public interface KGroupedTable<K, V> {
* @param < VR > the value type of the aggregated { @link KTable }
* @return a { @link KTable } that contains "update" records with unmodified keys , and values that represent the
* latest ( rolling ) aggregate for each key
* @deprecated use { @link # aggregate ( Initializer , Aggregator , Aggregator , Materialized ) }
* /
@Deprecated
< VR > KTable < K , VR > aggregate ( final Initializer < VR > initializer ,
final Aggregator < ? super K , ? super V , VR > adder ,
final Aggregator < ? super K , ? super V , VR > subtractor ,
final String queryableStoreName ) ;
/ * *
* Aggregate the value of records of the original { @link KTable } that got { @link KTable # groupBy ( KeyValueMapper )
* mapped } to the same key into a new instance of { @link KTable } using default serializers and deserializers .
* Records with { @code null } key are ignored .
* Aggregating is a generalization of { @link # reduce ( Reducer , Reducer , String ) combining via reduce ( . . . ) } as it ,
* for example , allows the result to have a different type than the input values .
* The result is written into a local { @link KeyValueStore } ( which is basically an ever - updating materialized view )
* provided by the given { @code storeSupplier } .
* Furthermore , updates to the store are sent downstream into a { @link KTable } changelog stream .
* < p >
* The specified { @link Initializer } is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record .
* Each update to the original { @link KTable } results in a two step update of the result { @link KTable } .
* The specified { @link Aggregator adder } is applied for each update record and computes a new aggregate using the
* current aggregate ( or for the very first record using the intermediate aggregation result provided via the
* { @link Initializer } ) and the record ' s value by adding the new record to the aggregate .
* The specified { @link Aggregator substractor } is applied for each "replaced" record of the original { @link KTable }
* and computes a new aggregate using the current aggregate and the record ' s value by "removing" the "replaced"
* record from the aggregate .
* Thus , { @code aggregate ( Initializer , Aggregator , Aggregator , String ) } can be used to compute aggregate functions
* like sum .
* For sum , the initializer , adder , and substractor would work as follows :
* < pre > { @code
* // in this example, LongSerde.class must be set as default value serde in StreamsConfig
* public class SumInitializer implements Initializer < Long > {
* public Long apply ( ) {
* return 0L ;
* }
* }
*
* public class SumAdder implements Aggregator < String , Integer , Long > {
* public Long apply ( String key , Integer newValue , Long aggregate ) {
* return aggregate + newValue ;
* }
* }
*
* public class SumSubstractor implements Aggregator < String , Integer , Long > {
* public Long apply ( String key , Integer oldValue , Long aggregate ) {
* return aggregate - oldValue ;
* }
* }
* } < / pre >
* Not all updates might get sent downstream , as an internal cache is used to deduplicate consecutive updates to
* the same key .
* The rate of propagated updates depends on your input data rate , the number of distinct keys , the number of
* parallel running Kafka Streams instances , and the { @link StreamsConfig configuration } parameters for
* { @link StreamsConfig # CACHE_MAX_BYTES_BUFFERING_CONFIG cache size } , and
* { @link StreamsConfig # COMMIT_INTERVAL_MS_CONFIG commit intervall } .
* < p >
* To query the local { @link KeyValueStore } it must be obtained via
* { @link KafkaStreams # store ( String , QueryableStoreType ) KafkaStreams # store ( . . . ) } :
* < pre > { @code
* KafkaStreams streams = . . . // counting words
* ReadOnlyKeyValueStore < String , Long > localStore = streams . store ( queryableStoreName , QueryableStoreTypes . < String , Long > keyValueStore ( ) ) ;
* String key = "some-word" ;
* Long countForWord = localStore . get ( key ) ; // key must be local (application state is shared over all running Kafka Streams instances)
* } < / pre >
* For non - local keys , a custom RPC mechanism must be implemented using { @link KafkaStreams # allMetadata ( ) } to
* query the value of the key on a parallel running instance of your Kafka Streams application .
* < p >
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka .
* The changelog topic will be named "${applicationId}-${queryableStoreName}-changelog" , where "applicationId" is
* user - specified in { @link StreamsConfig } via parameter
* { @link StreamsConfig # APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG } , "queryableStoreName" is the
* provide { @code queryableStoreName } , and "-changelog" is a fixed suffix .
* You can retrieve all generated internal topic names via { @link KafkaStreams # toString ( ) } .
*
* @param initializer an { @link Initializer } that provides an initial aggregate result value
* @param adder an { @link Aggregator } that adds a new record to the aggregate result
* @param subtractor an { @link Aggregator } that removed an old record from the aggregate result
* @param materialized the instance of { @link Materialized } used to materialize the state store . Cannot be { @code null }
* @param < VR > the value type of the aggregated { @link KTable }
* @return a { @link KTable } that contains "update" records with unmodified keys , and values that represent the
* latest ( rolling ) aggregate for each key
* /
< VR > KTable < K , VR > aggregate ( final Initializer < VR > initializer ,
final Aggregator < ? super K , ? super V , VR > adder ,
final Aggregator < ? super K , ? super V , VR > subtractor ,
final Materialized < K , VR , KeyValueStore < Bytes , byte [ ] > > materialized ) ;
/ * *
* Aggregate the value of records of the original { @link KTable } that got { @link KTable # groupBy ( KeyValueMapper )
* mapped } to the same key into a new instance of { @link KTable } using default serializers and deserializers .
@ -582,7 +784,9 @@ public interface KGroupedTable<K, V> {
@@ -582,7 +784,9 @@ public interface KGroupedTable<K, V> {
* @param < VR > the value type of the aggregated { @link KTable }
* @return a { @link KTable } that contains "update" records with unmodified keys , and values that represent the
* latest ( rolling ) aggregate for each key
* @deprecated use { @link # aggregate ( Initializer , Aggregator , Aggregator , Materialized ) }
* /
@Deprecated
< VR > KTable < K , VR > aggregate ( final Initializer < VR > initializer ,
final Aggregator < ? super K , ? super V , VR > adder ,
final Aggregator < ? super K , ? super V , VR > subtractor ,