Mirror of Apache Kafka
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

206 lines
5.3 KiB

#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ];
then
echo "USAGE: $0 [-daemon] [-name servicename] [-loggc] classname [opts]"
exit 1
fi
base_dir=$(dirname $0)/..
if [ -z "$SCALA_VERSION" ]; then
SCALA_VERSION=2.10.6
fi
if [ -z "$SCALA_BINARY_VERSION" ]; then
SCALA_BINARY_VERSION=2.10
fi
# run ./gradlew copyDependantLibs to get all dependant jars in a local dir
shopt -s nullglob
for dir in $base_dir/core/build/dependant-libs-${SCALA_VERSION}*;
do
CLASSPATH=$CLASSPATH:$dir/*
done
for file in $base_dir/examples/build/libs//kafka-examples*.jar;
do
CLASSPATH=$CLASSPATH:$file
done
for file in $base_dir/clients/build/libs/kafka-clients*.jar;
do
CLASSPATH=$CLASSPATH:$file
done
for file in $base_dir/streams/build/libs/kafka-streams*.jar;
do
CLASSPATH=$CLASSPATH:$file
done
for file in $base_dir/streams/examples/build/libs/kafka-streams-examples*.jar;
do
CLASSPATH=$CLASSPATH:$file
done
for file in $base_dir/streams/build/dependant-libs-${SCALA_VERSION}/rocksdb*.jar;
do
CLASSPATH=$CLASSPATH:$file
done
KAFKA-2276; KIP-25 initial patch Initial patch for KIP-25 Note that to install ducktape, do *not* use pip to install ducktape. Instead: ``` $ git clone gitgithub.com:confluentinc/ducktape.git $ cd ducktape $ python setup.py install ``` Author: Geoff Anderson <geoff@confluent.io> Author: Geoff <granders@gmail.com> Author: Liquan Pei <liquanpei@gmail.com> Reviewers: Ewen, Gwen, Jun, Guozhang Closes #70 from granders/KAFKA-2276 and squashes the following commits: a62fb6c [Geoff Anderson] fixed checkstyle errors a70f0f8 [Geoff Anderson] Merged in upstream trunk. 8b62019 [Geoff Anderson] Merged in upstream trunk. 47b7b64 [Geoff Anderson] Created separate tools jar so that the clients package does not pull in dependencies on the Jackson JSON tools or argparse4j. a9e6a14 [Geoff Anderson] Merged in upstream changes d18db7b [Geoff Anderson] fixed :rat errors (needed to add licenses) 321fdf8 [Geoff Anderson] Ignore tests/ and vagrant/ directories when running rat build task 795fc75 [Geoff Anderson] Merged in changes from upstream trunk. 1d93f06 [Geoff Anderson] Updated provisioning to use java 7 in light of KAFKA-2316 2ea4e29 [Geoff Anderson] Tweaked README, changed default log collection behavior on VerifiableProducer 0eb6fdc [Geoff Anderson] Merged in system-tests 69dd7be [Geoff Anderson] Merged in trunk 4034dd6 [Geoff Anderson] Merged in upstream trunk ede6450 [Geoff] Merge pull request #4 from confluentinc/move_muckrake 7751545 [Geoff Anderson] Corrected license headers e6d532f [Geoff Anderson] java 7 -> java 6 8c61e2d [Geoff Anderson] Reverted jdk back to 6 f14c507 [Geoff Anderson] Removed mode = "test" from Vagrantfile and Vagrantfile.local examples. Updated testing README to clarify aws setup. 98b7253 [Geoff Anderson] Updated consumer tests to pre-populate kafka logs e6a41f1 [Geoff Anderson] removed stray println b15b24f [Geoff Anderson] leftover KafkaBenchmark in super call 0f75187 [Geoff Anderson] Rmoved stray allow_fail. kafka_benchmark_test -> benchmark_test f469f84 [Geoff Anderson] Tweaked readme, added example Vagrantfile.local 3d73857 [Geoff Anderson] Merged downstream changes 42dcdb1 [Geoff Anderson] Tweaked behavior of stop_node, clean_node to generally fail fast 7f7c3e0 [Geoff Anderson] Updated setup.py for kafkatest c60125c [Geoff Anderson] TestEndToEndLatency -> EndToEndLatency 4f476fe [Geoff Anderson] Moved aws scripts to vagrant directory 5af88fc [Geoff Anderson] Updated README to include aws quickstart e5edf03 [Geoff Anderson] Updated example aws Vagrantfile.local 96533c3 [Geoff] Update aws-access-keys-commands 25a413d [Geoff] Update aws-example-Vagrantfile.local 884b20e [Geoff Anderson] Moved a bunch of files to kafkatest directory fc7c81c [Geoff Anderson] added setup.py 632be12 [Geoff] Merge pull request #3 from confluentinc/verbose-client 51a94fd [Geoff Anderson] Use argparse4j instead of joptsimple. ThroughputThrottler now has more intuitive behavior when targetThroughput is 0. a80a428 [Geoff Anderson] Added shell program for VerifiableProducer. d586fb0 [Geoff Anderson] Updated comments to reflect that throttler is not message-specific 6842ed1 [Geoff Anderson] left out a file from last commit 1228eef [Geoff Anderson] Renamed throttler 9100417 [Geoff Anderson] Updated command-line options for VerifiableProducer. Extracted throughput logic to make it reusable. 0a5de8e [Geoff Anderson] Fixed checkstyle errors. Changed name to VerifiableProducer. Added synchronization for thread safety on println statements. 475423b [Geoff Anderson] Convert class to string before adding to json object. bc009f2 [Geoff Anderson] Got rid of VerboseProducer in core (moved to clients) c0526fe [Geoff Anderson] Updates per review comments. 8b4b1f2 [Geoff Anderson] Minor updates to VerboseProducer 2777712 [Geoff Anderson] Added some metadata to producer output. da94b8c [Geoff Anderson] Added number of messages option. 07cd1c6 [Geoff Anderson] Added simple producer which prints status of produced messages to stdout. a278988 [Geoff Anderson] fixed typos f1914c3 [Liquan Pei] Merge pull request #2 from confluentinc/system_tests 81e4156 [Liquan Pei] Bootstrap Kafka system tests
9 years ago
for file in $base_dir/tools/build/libs/kafka-tools*.jar;
do
CLASSPATH=$CLASSPATH:$file
done
for dir in $base_dir/tools/build/dependant-libs-${SCALA_VERSION}*;
KAFKA-2276; KIP-25 initial patch Initial patch for KIP-25 Note that to install ducktape, do *not* use pip to install ducktape. Instead: ``` $ git clone gitgithub.com:confluentinc/ducktape.git $ cd ducktape $ python setup.py install ``` Author: Geoff Anderson <geoff@confluent.io> Author: Geoff <granders@gmail.com> Author: Liquan Pei <liquanpei@gmail.com> Reviewers: Ewen, Gwen, Jun, Guozhang Closes #70 from granders/KAFKA-2276 and squashes the following commits: a62fb6c [Geoff Anderson] fixed checkstyle errors a70f0f8 [Geoff Anderson] Merged in upstream trunk. 8b62019 [Geoff Anderson] Merged in upstream trunk. 47b7b64 [Geoff Anderson] Created separate tools jar so that the clients package does not pull in dependencies on the Jackson JSON tools or argparse4j. a9e6a14 [Geoff Anderson] Merged in upstream changes d18db7b [Geoff Anderson] fixed :rat errors (needed to add licenses) 321fdf8 [Geoff Anderson] Ignore tests/ and vagrant/ directories when running rat build task 795fc75 [Geoff Anderson] Merged in changes from upstream trunk. 1d93f06 [Geoff Anderson] Updated provisioning to use java 7 in light of KAFKA-2316 2ea4e29 [Geoff Anderson] Tweaked README, changed default log collection behavior on VerifiableProducer 0eb6fdc [Geoff Anderson] Merged in system-tests 69dd7be [Geoff Anderson] Merged in trunk 4034dd6 [Geoff Anderson] Merged in upstream trunk ede6450 [Geoff] Merge pull request #4 from confluentinc/move_muckrake 7751545 [Geoff Anderson] Corrected license headers e6d532f [Geoff Anderson] java 7 -> java 6 8c61e2d [Geoff Anderson] Reverted jdk back to 6 f14c507 [Geoff Anderson] Removed mode = "test" from Vagrantfile and Vagrantfile.local examples. Updated testing README to clarify aws setup. 98b7253 [Geoff Anderson] Updated consumer tests to pre-populate kafka logs e6a41f1 [Geoff Anderson] removed stray println b15b24f [Geoff Anderson] leftover KafkaBenchmark in super call 0f75187 [Geoff Anderson] Rmoved stray allow_fail. kafka_benchmark_test -> benchmark_test f469f84 [Geoff Anderson] Tweaked readme, added example Vagrantfile.local 3d73857 [Geoff Anderson] Merged downstream changes 42dcdb1 [Geoff Anderson] Tweaked behavior of stop_node, clean_node to generally fail fast 7f7c3e0 [Geoff Anderson] Updated setup.py for kafkatest c60125c [Geoff Anderson] TestEndToEndLatency -> EndToEndLatency 4f476fe [Geoff Anderson] Moved aws scripts to vagrant directory 5af88fc [Geoff Anderson] Updated README to include aws quickstart e5edf03 [Geoff Anderson] Updated example aws Vagrantfile.local 96533c3 [Geoff] Update aws-access-keys-commands 25a413d [Geoff] Update aws-example-Vagrantfile.local 884b20e [Geoff Anderson] Moved a bunch of files to kafkatest directory fc7c81c [Geoff Anderson] added setup.py 632be12 [Geoff] Merge pull request #3 from confluentinc/verbose-client 51a94fd [Geoff Anderson] Use argparse4j instead of joptsimple. ThroughputThrottler now has more intuitive behavior when targetThroughput is 0. a80a428 [Geoff Anderson] Added shell program for VerifiableProducer. d586fb0 [Geoff Anderson] Updated comments to reflect that throttler is not message-specific 6842ed1 [Geoff Anderson] left out a file from last commit 1228eef [Geoff Anderson] Renamed throttler 9100417 [Geoff Anderson] Updated command-line options for VerifiableProducer. Extracted throughput logic to make it reusable. 0a5de8e [Geoff Anderson] Fixed checkstyle errors. Changed name to VerifiableProducer. Added synchronization for thread safety on println statements. 475423b [Geoff Anderson] Convert class to string before adding to json object. bc009f2 [Geoff Anderson] Got rid of VerboseProducer in core (moved to clients) c0526fe [Geoff Anderson] Updates per review comments. 8b4b1f2 [Geoff Anderson] Minor updates to VerboseProducer 2777712 [Geoff Anderson] Added some metadata to producer output. da94b8c [Geoff Anderson] Added number of messages option. 07cd1c6 [Geoff Anderson] Added simple producer which prints status of produced messages to stdout. a278988 [Geoff Anderson] fixed typos f1914c3 [Liquan Pei] Merge pull request #2 from confluentinc/system_tests 81e4156 [Liquan Pei] Bootstrap Kafka system tests
9 years ago
do
CLASSPATH=$CLASSPATH:$dir/*
KAFKA-2276; KIP-25 initial patch Initial patch for KIP-25 Note that to install ducktape, do *not* use pip to install ducktape. Instead: ``` $ git clone gitgithub.com:confluentinc/ducktape.git $ cd ducktape $ python setup.py install ``` Author: Geoff Anderson <geoff@confluent.io> Author: Geoff <granders@gmail.com> Author: Liquan Pei <liquanpei@gmail.com> Reviewers: Ewen, Gwen, Jun, Guozhang Closes #70 from granders/KAFKA-2276 and squashes the following commits: a62fb6c [Geoff Anderson] fixed checkstyle errors a70f0f8 [Geoff Anderson] Merged in upstream trunk. 8b62019 [Geoff Anderson] Merged in upstream trunk. 47b7b64 [Geoff Anderson] Created separate tools jar so that the clients package does not pull in dependencies on the Jackson JSON tools or argparse4j. a9e6a14 [Geoff Anderson] Merged in upstream changes d18db7b [Geoff Anderson] fixed :rat errors (needed to add licenses) 321fdf8 [Geoff Anderson] Ignore tests/ and vagrant/ directories when running rat build task 795fc75 [Geoff Anderson] Merged in changes from upstream trunk. 1d93f06 [Geoff Anderson] Updated provisioning to use java 7 in light of KAFKA-2316 2ea4e29 [Geoff Anderson] Tweaked README, changed default log collection behavior on VerifiableProducer 0eb6fdc [Geoff Anderson] Merged in system-tests 69dd7be [Geoff Anderson] Merged in trunk 4034dd6 [Geoff Anderson] Merged in upstream trunk ede6450 [Geoff] Merge pull request #4 from confluentinc/move_muckrake 7751545 [Geoff Anderson] Corrected license headers e6d532f [Geoff Anderson] java 7 -> java 6 8c61e2d [Geoff Anderson] Reverted jdk back to 6 f14c507 [Geoff Anderson] Removed mode = "test" from Vagrantfile and Vagrantfile.local examples. Updated testing README to clarify aws setup. 98b7253 [Geoff Anderson] Updated consumer tests to pre-populate kafka logs e6a41f1 [Geoff Anderson] removed stray println b15b24f [Geoff Anderson] leftover KafkaBenchmark in super call 0f75187 [Geoff Anderson] Rmoved stray allow_fail. kafka_benchmark_test -> benchmark_test f469f84 [Geoff Anderson] Tweaked readme, added example Vagrantfile.local 3d73857 [Geoff Anderson] Merged downstream changes 42dcdb1 [Geoff Anderson] Tweaked behavior of stop_node, clean_node to generally fail fast 7f7c3e0 [Geoff Anderson] Updated setup.py for kafkatest c60125c [Geoff Anderson] TestEndToEndLatency -> EndToEndLatency 4f476fe [Geoff Anderson] Moved aws scripts to vagrant directory 5af88fc [Geoff Anderson] Updated README to include aws quickstart e5edf03 [Geoff Anderson] Updated example aws Vagrantfile.local 96533c3 [Geoff] Update aws-access-keys-commands 25a413d [Geoff] Update aws-example-Vagrantfile.local 884b20e [Geoff Anderson] Moved a bunch of files to kafkatest directory fc7c81c [Geoff Anderson] added setup.py 632be12 [Geoff] Merge pull request #3 from confluentinc/verbose-client 51a94fd [Geoff Anderson] Use argparse4j instead of joptsimple. ThroughputThrottler now has more intuitive behavior when targetThroughput is 0. a80a428 [Geoff Anderson] Added shell program for VerifiableProducer. d586fb0 [Geoff Anderson] Updated comments to reflect that throttler is not message-specific 6842ed1 [Geoff Anderson] left out a file from last commit 1228eef [Geoff Anderson] Renamed throttler 9100417 [Geoff Anderson] Updated command-line options for VerifiableProducer. Extracted throughput logic to make it reusable. 0a5de8e [Geoff Anderson] Fixed checkstyle errors. Changed name to VerifiableProducer. Added synchronization for thread safety on println statements. 475423b [Geoff Anderson] Convert class to string before adding to json object. bc009f2 [Geoff Anderson] Got rid of VerboseProducer in core (moved to clients) c0526fe [Geoff Anderson] Updates per review comments. 8b4b1f2 [Geoff Anderson] Minor updates to VerboseProducer 2777712 [Geoff Anderson] Added some metadata to producer output. da94b8c [Geoff Anderson] Added number of messages option. 07cd1c6 [Geoff Anderson] Added simple producer which prints status of produced messages to stdout. a278988 [Geoff Anderson] fixed typos f1914c3 [Liquan Pei] Merge pull request #2 from confluentinc/system_tests 81e4156 [Liquan Pei] Bootstrap Kafka system tests
9 years ago
done
for cc_pkg in "api" "runtime" "file" "json" "tools"
KAFKA-2366; Initial patch for Copycat This is an initial patch implementing the basics of Copycat for KIP-26. The intent here is to start a review of the key pieces of the core API and get a reasonably functional, baseline, non-distributed implementation of Copycat in place to get things rolling. The current patch has a number of known issues that need to be addressed before a final version: * Some build-related issues. Specifically, requires some locally-installed dependencies (see below), ignores checkstyle for the runtime data library because it's lifted from Avro currently and likely won't last in its current form, and some Gradle task dependencies aren't quite right because I haven't gotten rid of the dependency on `core` (which should now be an easy patch since new consumer groups are in a much better state). * This patch currently depends on some Confluent trunk code because I prototyped with our Avro serializers w/ schema-registry support. We need to figure out what we want to provide as an example built-in set of serializers. Unlike core Kafka where we could ignore the issue, providing only ByteArray or String serializers, this is pretty central to how Copycat works. * This patch uses a hacked up version of Avro as its runtime data format. Not sure if we want to go through the entire API discussion just to get some basic code committed, so I filed KAFKA-2367 to handle that separately. The core connector APIs and the runtime data APIs are entirely orthogonal. * This patch needs some updates to get aligned with recent new consumer changes (specifically, I'm aware of the ConcurrentModificationException issue on exit). More generally, the new consumer is in flux but Copycat depends on it, so there are likely to be some negative interactions. * The layout feels a bit awkward to me right now because I ported it from a Maven layout. We don't have nearly the same level of granularity in Kafka currently (core and clients, plus the mostly ignored examples, log4j-appender, and a couple of contribs). We might want to reorganize, although keeping data+api separate from runtime and connector plugins is useful for minimizing dependencies. * There are a variety of other things (e.g., I'm not happy with the exception hierarchy/how they are currently handled, TopicPartition doesn't really need to be duplicated unless we want Copycat entirely isolated from the Kafka APIs, etc), but I expect those we'll cover in the review. Before commenting on the patch, it's probably worth reviewing https://issues.apache.org/jira/browse/KAFKA-2365 and https://issues.apache.org/jira/browse/KAFKA-2366 to get an idea of what I had in mind for a) what we ultimately want with all the Copycat patches and b) what we aim to cover in this initial patch. My hope is that we can use a WIP patch (after the current obvious deficiencies are addressed) while recognizing that we want to make iterative progress with a bunch of subsequent PRs. Author: Ewen Cheslack-Postava <me@ewencp.org> Reviewers: Ismael Juma, Gwen Shapira Closes #99 from ewencp/copycat and squashes the following commits: a3a47a6 [Ewen Cheslack-Postava] Simplify Copycat exceptions, make them a subclass of KafkaException. 8c108b0 [Ewen Cheslack-Postava] Rename Coordinator to Herder to avoid confusion with the consumer coordinator. 7bf8075 [Ewen Cheslack-Postava] Make Copycat CLI speific to standalone mode, clean up some config and get rid of config storage in standalone mode. 656a003 [Ewen Cheslack-Postava] Clarify and expand the explanation of the Copycat Coordinator interface. c0e5fdc [Ewen Cheslack-Postava] Merge remote-tracking branch 'origin/trunk' into copycat 0fa7a36 [Ewen Cheslack-Postava] Mark Copycat classes as unstable and reduce visibility of some classes where possible. d55d31e [Ewen Cheslack-Postava] Reorganize Copycat code to put it all under one top-level directory. b29cb2c [Ewen Cheslack-Postava] Merge remote-tracking branch 'origin/trunk' into copycat d713a21 [Ewen Cheslack-Postava] Address Gwen's review comments. 6787a85 [Ewen Cheslack-Postava] Make Converter generic to match serializers since some serialization formats do not require a base class of Object; update many other classes to have generic key and value class type parameters to match this change. b194c73 [Ewen Cheslack-Postava] Split Copycat converter option into two options for key and value. 0b5a1a0 [Ewen Cheslack-Postava] Normalize naming to use partition for both source and Kafka, adjusting naming in CopycatRecord classes to clearly differentiate. e345142 [Ewen Cheslack-Postava] Remove Copycat reflection utils, use existing Utils and ConfigDef functionality from clients package. be5c387 [Ewen Cheslack-Postava] Minor cleanup 122423e [Ewen Cheslack-Postava] Style cleanup 6ba87de [Ewen Cheslack-Postava] Remove most of the Avro-based mock runtime data API, only preserving enough schema functionality to support basic primitive types for an initial patch. 4674d13 [Ewen Cheslack-Postava] Address review comments, clean up some code styling. 25b5739 [Ewen Cheslack-Postava] Fix sink task offset commit concurrency issue by moving it to the worker thread and waking up the consumer to ensure it exits promptly. 0aefe21 [Ewen Cheslack-Postava] Add log4j settings for Copycat. 220e42d [Ewen Cheslack-Postava] Replace Avro serializer with JSON serializer. 1243a7c [Ewen Cheslack-Postava] Merge remote-tracking branch 'origin/trunk' into copycat 5a618c6 [Ewen Cheslack-Postava] Remove offset serializers, instead reusing the existing serializers and removing schema projection support. e849e10 [Ewen Cheslack-Postava] Remove duplicated TopicPartition implementation. dec1379 [Ewen Cheslack-Postava] Switch to using new consumer coordinator instead of manually assigning partitions. Remove dependency of copycat-runtime on core. 4a9b4f3 [Ewen Cheslack-Postava] Add some helpful Copycat-specific build and test targets that cover all Copycat packages. 31cd1ca [Ewen Cheslack-Postava] Add CLI tools for Copycat. e14942c [Ewen Cheslack-Postava] Add Copycat file connector. 0233456 [Ewen Cheslack-Postava] Add copycat-avro and copycat-runtime 11981d2 [Ewen Cheslack-Postava] Add copycat-data and copycat-api
9 years ago
do
for file in $base_dir/connect/${cc_pkg}/build/libs/connect-${cc_pkg}*.jar;
KAFKA-2366; Initial patch for Copycat This is an initial patch implementing the basics of Copycat for KIP-26. The intent here is to start a review of the key pieces of the core API and get a reasonably functional, baseline, non-distributed implementation of Copycat in place to get things rolling. The current patch has a number of known issues that need to be addressed before a final version: * Some build-related issues. Specifically, requires some locally-installed dependencies (see below), ignores checkstyle for the runtime data library because it's lifted from Avro currently and likely won't last in its current form, and some Gradle task dependencies aren't quite right because I haven't gotten rid of the dependency on `core` (which should now be an easy patch since new consumer groups are in a much better state). * This patch currently depends on some Confluent trunk code because I prototyped with our Avro serializers w/ schema-registry support. We need to figure out what we want to provide as an example built-in set of serializers. Unlike core Kafka where we could ignore the issue, providing only ByteArray or String serializers, this is pretty central to how Copycat works. * This patch uses a hacked up version of Avro as its runtime data format. Not sure if we want to go through the entire API discussion just to get some basic code committed, so I filed KAFKA-2367 to handle that separately. The core connector APIs and the runtime data APIs are entirely orthogonal. * This patch needs some updates to get aligned with recent new consumer changes (specifically, I'm aware of the ConcurrentModificationException issue on exit). More generally, the new consumer is in flux but Copycat depends on it, so there are likely to be some negative interactions. * The layout feels a bit awkward to me right now because I ported it from a Maven layout. We don't have nearly the same level of granularity in Kafka currently (core and clients, plus the mostly ignored examples, log4j-appender, and a couple of contribs). We might want to reorganize, although keeping data+api separate from runtime and connector plugins is useful for minimizing dependencies. * There are a variety of other things (e.g., I'm not happy with the exception hierarchy/how they are currently handled, TopicPartition doesn't really need to be duplicated unless we want Copycat entirely isolated from the Kafka APIs, etc), but I expect those we'll cover in the review. Before commenting on the patch, it's probably worth reviewing https://issues.apache.org/jira/browse/KAFKA-2365 and https://issues.apache.org/jira/browse/KAFKA-2366 to get an idea of what I had in mind for a) what we ultimately want with all the Copycat patches and b) what we aim to cover in this initial patch. My hope is that we can use a WIP patch (after the current obvious deficiencies are addressed) while recognizing that we want to make iterative progress with a bunch of subsequent PRs. Author: Ewen Cheslack-Postava <me@ewencp.org> Reviewers: Ismael Juma, Gwen Shapira Closes #99 from ewencp/copycat and squashes the following commits: a3a47a6 [Ewen Cheslack-Postava] Simplify Copycat exceptions, make them a subclass of KafkaException. 8c108b0 [Ewen Cheslack-Postava] Rename Coordinator to Herder to avoid confusion with the consumer coordinator. 7bf8075 [Ewen Cheslack-Postava] Make Copycat CLI speific to standalone mode, clean up some config and get rid of config storage in standalone mode. 656a003 [Ewen Cheslack-Postava] Clarify and expand the explanation of the Copycat Coordinator interface. c0e5fdc [Ewen Cheslack-Postava] Merge remote-tracking branch 'origin/trunk' into copycat 0fa7a36 [Ewen Cheslack-Postava] Mark Copycat classes as unstable and reduce visibility of some classes where possible. d55d31e [Ewen Cheslack-Postava] Reorganize Copycat code to put it all under one top-level directory. b29cb2c [Ewen Cheslack-Postava] Merge remote-tracking branch 'origin/trunk' into copycat d713a21 [Ewen Cheslack-Postava] Address Gwen's review comments. 6787a85 [Ewen Cheslack-Postava] Make Converter generic to match serializers since some serialization formats do not require a base class of Object; update many other classes to have generic key and value class type parameters to match this change. b194c73 [Ewen Cheslack-Postava] Split Copycat converter option into two options for key and value. 0b5a1a0 [Ewen Cheslack-Postava] Normalize naming to use partition for both source and Kafka, adjusting naming in CopycatRecord classes to clearly differentiate. e345142 [Ewen Cheslack-Postava] Remove Copycat reflection utils, use existing Utils and ConfigDef functionality from clients package. be5c387 [Ewen Cheslack-Postava] Minor cleanup 122423e [Ewen Cheslack-Postava] Style cleanup 6ba87de [Ewen Cheslack-Postava] Remove most of the Avro-based mock runtime data API, only preserving enough schema functionality to support basic primitive types for an initial patch. 4674d13 [Ewen Cheslack-Postava] Address review comments, clean up some code styling. 25b5739 [Ewen Cheslack-Postava] Fix sink task offset commit concurrency issue by moving it to the worker thread and waking up the consumer to ensure it exits promptly. 0aefe21 [Ewen Cheslack-Postava] Add log4j settings for Copycat. 220e42d [Ewen Cheslack-Postava] Replace Avro serializer with JSON serializer. 1243a7c [Ewen Cheslack-Postava] Merge remote-tracking branch 'origin/trunk' into copycat 5a618c6 [Ewen Cheslack-Postava] Remove offset serializers, instead reusing the existing serializers and removing schema projection support. e849e10 [Ewen Cheslack-Postava] Remove duplicated TopicPartition implementation. dec1379 [Ewen Cheslack-Postava] Switch to using new consumer coordinator instead of manually assigning partitions. Remove dependency of copycat-runtime on core. 4a9b4f3 [Ewen Cheslack-Postava] Add some helpful Copycat-specific build and test targets that cover all Copycat packages. 31cd1ca [Ewen Cheslack-Postava] Add CLI tools for Copycat. e14942c [Ewen Cheslack-Postava] Add Copycat file connector. 0233456 [Ewen Cheslack-Postava] Add copycat-avro and copycat-runtime 11981d2 [Ewen Cheslack-Postava] Add copycat-data and copycat-api
9 years ago
do
CLASSPATH=$CLASSPATH:$file
done
if [ -d "$base_dir/connect/${cc_pkg}/build/dependant-libs" ] ; then
CLASSPATH=$CLASSPATH:$base_dir/connect/${cc_pkg}/build/dependant-libs/*
fi
KAFKA-2366; Initial patch for Copycat This is an initial patch implementing the basics of Copycat for KIP-26. The intent here is to start a review of the key pieces of the core API and get a reasonably functional, baseline, non-distributed implementation of Copycat in place to get things rolling. The current patch has a number of known issues that need to be addressed before a final version: * Some build-related issues. Specifically, requires some locally-installed dependencies (see below), ignores checkstyle for the runtime data library because it's lifted from Avro currently and likely won't last in its current form, and some Gradle task dependencies aren't quite right because I haven't gotten rid of the dependency on `core` (which should now be an easy patch since new consumer groups are in a much better state). * This patch currently depends on some Confluent trunk code because I prototyped with our Avro serializers w/ schema-registry support. We need to figure out what we want to provide as an example built-in set of serializers. Unlike core Kafka where we could ignore the issue, providing only ByteArray or String serializers, this is pretty central to how Copycat works. * This patch uses a hacked up version of Avro as its runtime data format. Not sure if we want to go through the entire API discussion just to get some basic code committed, so I filed KAFKA-2367 to handle that separately. The core connector APIs and the runtime data APIs are entirely orthogonal. * This patch needs some updates to get aligned with recent new consumer changes (specifically, I'm aware of the ConcurrentModificationException issue on exit). More generally, the new consumer is in flux but Copycat depends on it, so there are likely to be some negative interactions. * The layout feels a bit awkward to me right now because I ported it from a Maven layout. We don't have nearly the same level of granularity in Kafka currently (core and clients, plus the mostly ignored examples, log4j-appender, and a couple of contribs). We might want to reorganize, although keeping data+api separate from runtime and connector plugins is useful for minimizing dependencies. * There are a variety of other things (e.g., I'm not happy with the exception hierarchy/how they are currently handled, TopicPartition doesn't really need to be duplicated unless we want Copycat entirely isolated from the Kafka APIs, etc), but I expect those we'll cover in the review. Before commenting on the patch, it's probably worth reviewing https://issues.apache.org/jira/browse/KAFKA-2365 and https://issues.apache.org/jira/browse/KAFKA-2366 to get an idea of what I had in mind for a) what we ultimately want with all the Copycat patches and b) what we aim to cover in this initial patch. My hope is that we can use a WIP patch (after the current obvious deficiencies are addressed) while recognizing that we want to make iterative progress with a bunch of subsequent PRs. Author: Ewen Cheslack-Postava <me@ewencp.org> Reviewers: Ismael Juma, Gwen Shapira Closes #99 from ewencp/copycat and squashes the following commits: a3a47a6 [Ewen Cheslack-Postava] Simplify Copycat exceptions, make them a subclass of KafkaException. 8c108b0 [Ewen Cheslack-Postava] Rename Coordinator to Herder to avoid confusion with the consumer coordinator. 7bf8075 [Ewen Cheslack-Postava] Make Copycat CLI speific to standalone mode, clean up some config and get rid of config storage in standalone mode. 656a003 [Ewen Cheslack-Postava] Clarify and expand the explanation of the Copycat Coordinator interface. c0e5fdc [Ewen Cheslack-Postava] Merge remote-tracking branch 'origin/trunk' into copycat 0fa7a36 [Ewen Cheslack-Postava] Mark Copycat classes as unstable and reduce visibility of some classes where possible. d55d31e [Ewen Cheslack-Postava] Reorganize Copycat code to put it all under one top-level directory. b29cb2c [Ewen Cheslack-Postava] Merge remote-tracking branch 'origin/trunk' into copycat d713a21 [Ewen Cheslack-Postava] Address Gwen's review comments. 6787a85 [Ewen Cheslack-Postava] Make Converter generic to match serializers since some serialization formats do not require a base class of Object; update many other classes to have generic key and value class type parameters to match this change. b194c73 [Ewen Cheslack-Postava] Split Copycat converter option into two options for key and value. 0b5a1a0 [Ewen Cheslack-Postava] Normalize naming to use partition for both source and Kafka, adjusting naming in CopycatRecord classes to clearly differentiate. e345142 [Ewen Cheslack-Postava] Remove Copycat reflection utils, use existing Utils and ConfigDef functionality from clients package. be5c387 [Ewen Cheslack-Postava] Minor cleanup 122423e [Ewen Cheslack-Postava] Style cleanup 6ba87de [Ewen Cheslack-Postava] Remove most of the Avro-based mock runtime data API, only preserving enough schema functionality to support basic primitive types for an initial patch. 4674d13 [Ewen Cheslack-Postava] Address review comments, clean up some code styling. 25b5739 [Ewen Cheslack-Postava] Fix sink task offset commit concurrency issue by moving it to the worker thread and waking up the consumer to ensure it exits promptly. 0aefe21 [Ewen Cheslack-Postava] Add log4j settings for Copycat. 220e42d [Ewen Cheslack-Postava] Replace Avro serializer with JSON serializer. 1243a7c [Ewen Cheslack-Postava] Merge remote-tracking branch 'origin/trunk' into copycat 5a618c6 [Ewen Cheslack-Postava] Remove offset serializers, instead reusing the existing serializers and removing schema projection support. e849e10 [Ewen Cheslack-Postava] Remove duplicated TopicPartition implementation. dec1379 [Ewen Cheslack-Postava] Switch to using new consumer coordinator instead of manually assigning partitions. Remove dependency of copycat-runtime on core. 4a9b4f3 [Ewen Cheslack-Postava] Add some helpful Copycat-specific build and test targets that cover all Copycat packages. 31cd1ca [Ewen Cheslack-Postava] Add CLI tools for Copycat. e14942c [Ewen Cheslack-Postava] Add Copycat file connector. 0233456 [Ewen Cheslack-Postava] Add copycat-avro and copycat-runtime 11981d2 [Ewen Cheslack-Postava] Add copycat-data and copycat-api
9 years ago
done
# classpath addition for release
CLASSPATH=$CLASSPATH:$base_dir/libs/*
for file in $base_dir/core/build/libs/kafka_${SCALA_BINARY_VERSION}*.jar;
do
CLASSPATH=$CLASSPATH:$file
done
shopt -u nullglob
# JMX settings
if [ -z "$KAFKA_JMX_OPTS" ]; then
KAFKA_JMX_OPTS="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false "
fi
# JMX port to use
if [ $JMX_PORT ]; then
KAFKA_JMX_OPTS="$KAFKA_JMX_OPTS -Dcom.sun.management.jmxremote.port=$JMX_PORT "
fi
# Log directory to use
if [ "x$LOG_DIR" = "x" ]; then
LOG_DIR="$base_dir/logs"
fi
# Log4j settings
if [ -z "$KAFKA_LOG4J_OPTS" ]; then
# Log to console. This is a tool.
KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/config/tools-log4j.properties"
else
# create logs directory
if [ ! -d "$LOG_DIR" ]; then
mkdir -p "$LOG_DIR"
fi
fi
KAFKA_LOG4J_OPTS="-Dkafka.logs.dir=$LOG_DIR $KAFKA_LOG4J_OPTS"
# Generic jvm settings you want to add
if [ -z "$KAFKA_OPTS" ]; then
KAFKA_OPTS=""
fi
# Set Debug options if enabled
if [ "x$KAFKA_DEBUG" != "x" ]; then
# Use default ports
DEFAULT_JAVA_DEBUG_PORT="5005"
if [ -z "$JAVA_DEBUG_PORT" ]; then
JAVA_DEBUG_PORT="$DEFAULT_JAVA_DEBUG_PORT"
fi
# Use the defaults if JAVA_DEBUG_OPTS was not set
DEFAULT_JAVA_DEBUG_OPTS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=${DEBUG_SUSPEND_FLAG:-n},address=$JAVA_DEBUG_PORT"
if [ -z "$JAVA_DEBUG_OPTS" ]; then
JAVA_DEBUG_OPTS="$DEFAULT_JAVA_DEBUG_OPTS"
fi
echo "Enabling Java debug options: $JAVA_DEBUG_OPTS"
KAFKA_OPTS="$JAVA_DEBUG_OPTS $KAFKA_OPTS"
fi
# Which java to use
if [ -z "$JAVA_HOME" ]; then
JAVA="java"
else
JAVA="$JAVA_HOME/bin/java"
fi
# Memory options
if [ -z "$KAFKA_HEAP_OPTS" ]; then
KAFKA_HEAP_OPTS="-Xmx256M"
fi
# JVM performance options
if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then
KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true"
fi
while [ $# -gt 0 ]; do
COMMAND=$1
case $COMMAND in
-name)
DAEMON_NAME=$2
CONSOLE_OUTPUT_FILE=$LOG_DIR/$DAEMON_NAME.out
shift 2
;;
-loggc)
if [ -z "$KAFKA_GC_LOG_OPTS" ]; then
GC_LOG_ENABLED="true"
fi
shift
;;
-daemon)
DAEMON_MODE="true"
shift
;;
*)
break
;;
esac
done
# GC options
GC_FILE_SUFFIX='-gc.log'
GC_LOG_FILE_NAME=''
if [ "x$GC_LOG_ENABLED" = "xtrue" ]; then
GC_LOG_FILE_NAME=$DAEMON_NAME$GC_FILE_SUFFIX
KAFKA_GC_LOG_OPTS="-Xloggc:$LOG_DIR/$GC_LOG_FILE_NAME -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps "
fi
# Launch mode
if [ "x$DAEMON_MODE" = "xtrue" ]; then
nohup $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS $KAFKA_LOG4J_OPTS -cp $CLASSPATH $KAFKA_OPTS "$@" > "$CONSOLE_OUTPUT_FILE" 2>&1 < /dev/null &
else
exec $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS $KAFKA_LOG4J_OPTS -cp $CLASSPATH $KAFKA_OPTS "$@"
fi