Mirror of Apache Kafka
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

103 lines
4.9 KiB

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ducktape.tests.test import Test
from ducktape.mark.resource import cluster
from ducktape.mark import parametrize, matrix
from kafkatest.tests.kafka_test import KafkaTest
from kafkatest.services.performance.streams_performance import StreamsSimpleBenchmarkService
from kafkatest.services.zookeeper import ZookeeperService
from kafkatest.services.kafka import KafkaService
from kafkatest.version import DEV_BRANCH
class StreamsSimpleBenchmarkTest(Test):
"""
Simple benchmark of Kafka Streams.
"""
def __init__(self, test_context):
super(StreamsSimpleBenchmarkTest, self).__init__(test_context)
self.num_records = 10000000L
self.replication = 1
self.num_threads = 1
@cluster(num_nodes=9)
@matrix(test=["produce", "consume", "count", "processstream", "processstreamwithsink", "processstreamwithstatestore", "processstreamwithcachedstatestore", "kstreamktablejoin", "kstreamkstreamjoin", "ktablektablejoin", "yahoo"], scale=[1, 3])
def test_simple_benchmark(self, test, scale):
"""
Run simple Kafka Streams benchmark
"""
self.driver = [None] * (scale + 1)
node = [None] * (scale)
data = [None] * (scale)
#############
# SETUP PHASE
#############
self.zk = ZookeeperService(self.test_context, num_nodes=1)
self.zk.start()
self.kafka = KafkaService(self.test_context, num_nodes=scale, zk=self.zk, version=DEV_BRANCH, topics={
'simpleBenchmarkSourceTopic' : { 'partitions': scale, 'replication-factor': self.replication },
'countTopic' : { 'partitions': scale, 'replication-factor': self.replication },
'simpleBenchmarkSinkTopic' : { 'partitions': scale, 'replication-factor': self.replication },
'joinSourceTopic1KStreamKStream' : { 'partitions': scale, 'replication-factor': self.replication },
'joinSourceTopic2KStreamKStream' : { 'partitions': scale, 'replication-factor': self.replication },
'joinSourceTopic1KStreamKTable' : { 'partitions': scale, 'replication-factor': self.replication },
'joinSourceTopic2KStreamKTable' : { 'partitions': scale, 'replication-factor': self.replication },
'joinSourceTopic1KTableKTable' : { 'partitions': scale, 'replication-factor': self.replication },
'joinSourceTopic2KTableKTable' : { 'partitions': scale, 'replication-factor': self.replication },
'yahooCampaigns' : { 'partitions': 20, 'replication-factor': self.replication },
'yahooEvents' : { 'partitions': 20, 'replication-factor': self.replication }
})
self.kafka.log_level = "INFO"
self.kafka.start()
################
# LOAD PHASE
################
self.load_driver = StreamsSimpleBenchmarkService(self.test_context, self.kafka,
self.num_records * scale, "true", test,
self.num_threads)
self.load_driver.start()
self.load_driver.wait()
self.load_driver.stop()
################
# RUN PHASE
################
for num in range(0, scale):
self.driver[num] = StreamsSimpleBenchmarkService(self.test_context, self.kafka,
self.num_records/(scale), "false", test,
self.num_threads)
self.driver[num].start()
#######################
# STOP + COLLECT PHASE
#######################
for num in range(0, scale):
self.driver[num].wait()
self.driver[num].stop()
node[num] = self.driver[num].node
node[num].account.ssh("grep Performance %s" % self.driver[num].STDOUT_FILE, allow_fail=False)
data[num] = self.driver[num].collect_data(node[num], "" )
final = {}
for num in range(0, scale):
for key in data[num]:
final[key + str(num)] = data[num][key]
return final