Skip to content

Commit

Permalink
Check if all cluster members are healthy
Browse files Browse the repository at this point in the history
According to etcd-io/etcd#8070, cluster-health
should check this but apparently this broke in some recent releases, so
we're checking this explicitly in the script now.
  • Loading branch information
discordianfish committed May 22, 2018
1 parent d5a66b3 commit 32d1935
Showing 1 changed file with 20 additions and 3 deletions.
23 changes: 20 additions & 3 deletions kubernetes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1051,17 +1051,34 @@ Resources:
- |
#!/bin/bash
set -euo pipefail
echo "Wait for cluster-health"
while ! /etc/etcdctl-wrapper cluster-health; do sleep 1; done
echo "Wait for etcd to join cluster"
while ! /etc/etcdctl-wrapper member list | grep $(hostname); do sleep 1; done
while ! /etc/etcdctl-wrapper member list | grep "$(hostname)"; do sleep 1; done
echo "Wait for cluster-health"
while true; do
sleep 1
/etc/etcdctl-wrapper cluster-health | tee /tmp/cluster-health.txt
if [[ "${!PIPESTATUS[0]}" -ne 0 ]]; then
echo " - no quorum, retrying"
continue
fi
if [[ "$(cat /tmp/cluster-health.txt | wc -l)" -ne "$((${ControllerPoolSize}+1))" ]]; then
echo "- unexpected number of peers"
continue
fi
# continue if all lines contain 'is healthy'
if ! grep -v "is healthy" /tmp/cluster-health.txt; then
break
fi
echo " - unhealthy members found, retrying"
done
echo "Signaling success"
docker run --rm --net=host rochacon/cfn-bootstrap cfn-signal \
--resource ControllerAutoScalingGroup \
--stack ${StackName} \
--region ${Region} || true # Ignore if signaling failed
- StackName: !Ref AWS::StackName
Region: !Ref AWS::Region
ControllerPoolSize: !Ref ControllerPoolSize
# Environment files
etcdEnv:
Fn::Base64:
Expand Down

0 comments on commit 32d1935

Please sign in to comment.