#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -o nounset set -o errexit # exit script if any command exits with nonzero value readonly PROG_NAME=$(basename $0) readonly PROG_DIR=$(dirname $(realpath $0)) readonly INVOKE_DIR=$(pwd) readonly ARGS="$@" # overrideable defaults AWS=false PARALLEL=true MAX_PARALLEL=5 DEBUG=false readonly USAGE="Usage: $PROG_NAME [-h | --help] [--aws [--no-parallel] [--max-parallel MAX]]" readonly HELP="$(cat < 0 ]]; do key="$1" case $key in -h | --help) help ;; --aws) AWS=true ;; --no-parallel) PARALLEL=false ;; --max-parallel) MAX_PARALLEL="$2" shift ;; --debug) DEBUG=true ;; *) # unknown option echo "Unknown option $1" exit 1 ;; esac shift # past argument or value done # Get a list of vagrant machines (in any state) function read_vagrant_machines { local ignore_state="ignore" local reading_state="reading" local tmp_file="tmp-$RANDOM" local state="$ignore_state" local machines="" while read -r line; do # Lines before the first empty line are ignored # The first empty line triggers change from ignore state to reading state # When in reading state, we parse in machine names until we hit the next empty line, # which signals that we're done parsing if [[ -z "$line" ]]; then if [[ "$state" == "$ignore_state" ]]; then state="$reading_state" else # all done echo "$machines" return fi continue fi # Parse machine name while in reading state if [[ "$state" == "$reading_state" ]]; then line=$(echo "$line" | cut -d ' ' -f 1) if [[ -z "$machines" ]]; then machines="$line" else machines="${machines} ${line}" fi fi done < <(vagrant status) } # Filter "list", returning a list of strings containing pattern as a substring function filter { local list="$1" local pattern="$2" local result="" for item in $list; do if [[ ! -z "$(echo $item | grep "$pattern")" ]]; then result="$result $item" fi done echo "$result" } # Given a list of machine names, return only test worker machines function worker { local machines="$1" local workers=$(filter "$machines" "worker") workers=$(echo "$workers" | xargs) # trim leading/trailing whitespace echo "$workers" } # Given a list of machine names, return only zookeeper and broker machines function zk_broker { local machines="$1" local zk_broker_list=$(filter "$machines" "zk") zk_broker_list="$zk_broker_list $(filter "$machines" "broker")" zk_broker_list=$(echo "$zk_broker_list" | xargs) # trim leading/trailing whitespace echo "$zk_broker_list" } # Run a vagrant command on batches of machines of size $group_size # This is annoying but necessary on aws to avoid errors due to AWS request rate # throttling # # Example # $ vagrant_batch_command "vagrant up" "m1 m2 m3 m4 m5" "2" # # This is equivalent to running "vagrant up" on groups of machines of size 2 or less, i.e.: # $ vagrant up m1 m2 # $ vagrant up m3 m4 # $ vagrant up m5 function vagrant_batch_command { local vagrant_cmd="$1" local machines="$2" local group_size="$3" local count=1 local m_group="" # Using --provision flag makes this command useable both when bringing up a cluster from scratch, # and when bringing up a halted cluster. Permissions on certain directores set during provisioning # seem to revert when machines are halted, so --provision ensures permissions are set correctly in all cases for machine in $machines; do m_group="$m_group $machine" if [[ $(expr $count % $group_size) == 0 ]]; then # We've reached a full group # Bring up this part of the cluster $vagrant_cmd $m_group m_group="" fi ((count++)) done # Take care of any leftover partially complete group if [[ ! -z "$m_group" ]]; then $vagrant_cmd $m_group fi } # We assume vagrant-hostmanager is installed, but may or may not be disabled during vagrant up # In this fashion, we ensure we run hostmanager after machines are up, and before provisioning. # This sequence of commands is necessary for example for bringing up a multi-node zookeeper cluster function bring_up_local { vagrant up --no-provision vagrant hostmanager vagrant provision } function bring_up_aws { local parallel="$1" local max_parallel="$2" local machines="$(read_vagrant_machines)" case "$3" in true) local debug="--debug" ;; false) local debug="" ;; esac zk_broker_machines=$(zk_broker "$machines") worker_machines=$(worker "$machines") if [[ "$parallel" == "true" ]]; then if [[ ! -z "$zk_broker_machines" ]]; then # We still have to bring up zookeeper/broker nodes serially echo "Bringing up zookeeper/broker machines serially" vagrant up --provider=aws --no-parallel --no-provision $zk_broker_machines $debug vagrant hostmanager --provider=aws vagrant provision fi if [[ ! -z "$worker_machines" ]]; then echo "Bringing up test worker machines in parallel" # Try to isolate this job in its own /tmp space. See note # below about vagrant issue local vagrant_rsync_temp_dir=$(mktemp -d); TMPDIR=$vagrant_rsync_temp_dir vagrant_batch_command "vagrant up $debug --provider=aws" "$worker_machines" "$max_parallel" rm -rf $vagrant_rsync_temp_dir vagrant hostmanager --provider=aws fi else vagrant up --provider=aws --no-parallel --no-provision $debug vagrant hostmanager --provider=aws vagrant provision fi # Currently it seems that the AWS provider will always run rsync # as part of vagrant up. However, # https://github.com/mitchellh/vagrant/issues/7531 means it is not # safe to do so. Since the bug doesn't seem to cause any direct # errors, just missing data on some nodes, follow up with serial # rsyncing to ensure we're in a clean state. Use custom TMPDIR # values to ensure we're isolated from any other instances of this # script that are running/ran recently and may cause different # instances to sync to the wrong nodes for worker in $worker_machines; do local vagrant_rsync_temp_dir=$(mktemp -d); TMPDIR=$vagrant_rsync_temp_dir vagrant rsync $worker; rm -rf $vagrant_rsync_temp_dir done } function main { if [[ "$AWS" == "true" ]]; then bring_up_aws "$PARALLEL" "$MAX_PARALLEL" "$DEBUG" else bring_up_local fi } main