Browse Source
The main impediment to bringing up aws machines in parallel using vagrant was the interaction between `vagrant-hostmanager` and `vagrant-aws`. If you disable hostmanager during the `up` phase, and run it after the cluster is up, parallel bringup is possible. The only caveat is that machines must be brought up in small-ish batches to prevent rate limit errors from AWS since `vagrant-aws` doesn't seem to have mechanisms to This PR: - disables `vagrant-hostmanager` during bringup - adds a wrapper script to make it convenient to bring machines up in batches on aws Author: Geoff Anderson <geoff@confluent.io> Reviewers: Ewen Cheslack-Postava <ewen@confluent.io> Closes #982 from granders/vagrant-disable-hostmanagerpull/982/merge
5 changed files with 258 additions and 10 deletions
@ -0,0 +1,237 @@
@@ -0,0 +1,237 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more |
||||
# contributor license agreements. See the NOTICE file distributed with |
||||
# this work for additional information regarding copyright ownership. |
||||
# The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
# (the "License"); you may not use this file except in compliance with |
||||
# the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
#! /usr/bin/env bash |
||||
|
||||
set -o nounset |
||||
set -o errexit # exit script if any command exits with nonzero value |
||||
|
||||
readonly PROG_NAME=$(basename $0) |
||||
readonly PROG_DIR=$(dirname $(realpath $0)) |
||||
readonly INVOKE_DIR=$(pwd) |
||||
readonly ARGS="$@" |
||||
|
||||
# overrideable defaults |
||||
AWS=false |
||||
PARALLEL=true |
||||
MAX_PARALLEL=5 |
||||
|
||||
readonly USAGE="Usage: $PROG_NAME [-h | --help] [--aws [--no-parallel] [--max-parallel MAX]]" |
||||
readonly HELP="$(cat <<EOF |
||||
Tool to bring up a vagrant cluster on local machine or aws. |
||||
|
||||
-h | --help Show this help message |
||||
--aws Use if you are running in aws |
||||
--no-parallel Bring up machines not in parallel. Only applicable on aws |
||||
--max-parallel MAX Maximum number of machines to bring up in parallel. Note: only applicable on test worker machines on aws. default: $MAX_PARALLEL |
||||
|
||||
Approximately speaking, this wrapper script essentially wraps 2 commands: |
||||
vagrant up |
||||
vagrant hostmanager |
||||
|
||||
The situation on aws is complicated by the fact that aws imposes a maximum request rate, |
||||
which effectively caps the number of machines we are able to bring up in parallel. Therefore, on aws, |
||||
this wrapper script attempts to bring up machines in small batches. |
||||
|
||||
If you are seeing rate limit exceeded errors, you may need to use a reduced --max-parallel setting. |
||||
|
||||
EOF |
||||
)" |
||||
|
||||
function help { |
||||
echo "$USAGE" |
||||
echo "$HELP" |
||||
exit 0 |
||||
} |
||||
|
||||
while [[ $# > 0 ]]; do |
||||
key="$1" |
||||
case $key in |
||||
-h | --help) |
||||
help |
||||
;; |
||||
--aws) |
||||
AWS=true |
||||
;; |
||||
--no-parallel) |
||||
PARALLEL=false |
||||
;; |
||||
--max-parallel) |
||||
MAX_PARALLEL="$2" |
||||
shift |
||||
;; |
||||
*) |
||||
# unknown option |
||||
echo "Unknown option $1" |
||||
exit 1 |
||||
;; |
||||
esac |
||||
shift # past argument or value |
||||
done |
||||
|
||||
# Get a list of vagrant machines (in any state) |
||||
function read_vagrant_machines { |
||||
local ignore_state="ignore" |
||||
local reading_state="reading" |
||||
local tmp_file="tmp-$RANDOM" |
||||
|
||||
local state="$ignore_state" |
||||
local machines="" |
||||
|
||||
while read -r line; do |
||||
# Lines before the first empty line are ignored |
||||
# The first empty line triggers change from ignore state to reading state |
||||
# When in reading state, we parse in machine names until we hit the next empty line, |
||||
# which signals that we're done parsing |
||||
if [[ -z "$line" ]]; then |
||||
if [[ "$state" == "$ignore_state" ]]; then |
||||
state="$reading_state" |
||||
else |
||||
# all done |
||||
echo "$machines" |
||||
return |
||||
fi |
||||
continue |
||||
fi |
||||
|
||||
# Parse machine name while in reading state |
||||
if [[ "$state" == "$reading_state" ]]; then |
||||
line=$(echo "$line" | cut -d ' ' -f 1) |
||||
if [[ -z "$machines" ]]; then |
||||
machines="$line" |
||||
else |
||||
machines="${machines} ${line}" |
||||
fi |
||||
fi |
||||
done < <(vagrant status) |
||||
} |
||||
|
||||
# Filter "list", returning a list of strings containing pattern as a substring |
||||
function filter { |
||||
local list="$1" |
||||
local pattern="$2" |
||||
|
||||
local result="" |
||||
for item in $list; do |
||||
if [[ ! -z "$(echo $item | grep "$pattern")" ]]; then |
||||
result="$result $item" |
||||
fi |
||||
done |
||||
echo "$result" |
||||
} |
||||
|
||||
# Given a list of machine names, return only test worker machines |
||||
function worker { |
||||
local machines="$1" |
||||
local workers=$(filter "$machines" "worker") |
||||
workers=$(echo "$workers" | xargs) # trim leading/trailing whitespace |
||||
echo "$workers" |
||||
} |
||||
|
||||
# Given a list of machine names, return only zookeeper and broker machines |
||||
function zk_broker { |
||||
local machines="$1" |
||||
local zk_broker_list=$(filter "$machines" "zk") |
||||
zk_broker_list="$zk_broker_list $(filter "$machines" "broker")" |
||||
zk_broker_list=$(echo "$zk_broker_list" | xargs) # trim leading/trailing whitespace |
||||
echo "$zk_broker_list" |
||||
} |
||||
|
||||
# Run a vagrant command on batches of machines of size $group_size |
||||
# This is annoying but necessary on aws to avoid errors due to AWS request rate |
||||
# throttling |
||||
# |
||||
# Example |
||||
# $ vagrant_batch_command "vagrant up" "m1 m2 m3 m4 m5" "2" |
||||
# |
||||
# This is equivalent to running "vagrant up" on groups of machines of size 2 or less, i.e.: |
||||
# $ vagrant up m1 m2 |
||||
# $ vagrant up m3 m4 |
||||
# $ vagrant up m5 |
||||
function vagrant_batch_command { |
||||
local vagrant_cmd="$1" |
||||
local machines="$2" |
||||
local group_size="$3" |
||||
|
||||
local count=1 |
||||
local m_group="" |
||||
# Using --provision flag makes this command useable both when bringing up a cluster from scratch, |
||||
# and when bringing up a halted cluster. Permissions on certain directores set during provisioning |
||||
# seem to revert when machines are halted, so --provision ensures permissions are set correctly in all cases |
||||
for machine in $machines; do |
||||
m_group="$m_group $machine" |
||||
|
||||
if [[ $(expr $count % $group_size) == 0 ]]; then |
||||
# We've reached a full group |
||||
# Bring up this part of the cluster |
||||
$vagrant_cmd $m_group |
||||
m_group="" |
||||
fi |
||||
((count++)) |
||||
done |
||||
|
||||
# Take care of any leftover partially complete group |
||||
if [[ ! -z "$m_group" ]]; then |
||||
$vagrant_cmd $m_group |
||||
fi |
||||
} |
||||
|
||||
# We assume vagrant-hostmanager is installed, but may or may not be disabled during vagrant up |
||||
# In this fashion, we ensure we run hostmanager after machines are up, and before provisioning. |
||||
# This sequence of commands is necessary for example for bringing up a multi-node zookeeper cluster |
||||
function bring_up_local { |
||||
vagrant up --no-provision |
||||
vagrant hostmanager |
||||
vagrant provision |
||||
} |
||||
|
||||
function bring_up_aws { |
||||
local parallel="$1" |
||||
local max_parallel="$2" |
||||
local machines="$(read_vagrant_machines)" |
||||
|
||||
zk_broker_machines=$(zk_broker "$machines") |
||||
worker_machines=$(worker "$machines") |
||||
|
||||
if [[ "$parallel" == "true" ]]; then |
||||
if [[ ! -z "$zk_broker_machines" ]]; then |
||||
# We still have to bring up zookeeper/broker nodes serially |
||||
echo "Bringing up zookeeper/broker machines serially" |
||||
vagrant up --provider=aws --no-parallel --no-provision $zk_broker_machines |
||||
vagrant hostmanager |
||||
vagrant provision |
||||
fi |
||||
|
||||
if [[ ! -z "$worker_machines" ]]; then |
||||
echo "Bringing up test worker machines in parallel" |
||||
vagrant_batch_command "vagrant up --provider=aws" "$worker_machines" "$max_parallel" |
||||
vagrant hostmanager |
||||
fi |
||||
else |
||||
vagrant up --provider=aws --no-parallel --no-provision |
||||
vagrant hostmanager |
||||
vagrant provision |
||||
fi |
||||
} |
||||
|
||||
function main { |
||||
if [[ "$AWS" == "true" ]]; then |
||||
bring_up_aws "$PARALLEL" "$MAX_PARALLEL" |
||||
else |
||||
bring_up_local |
||||
fi |
||||
} |
||||
|
||||
main |
Loading…
Reference in new issue