Files
nocodb/scripts/release/rollout_util.sh
2023-11-10 13:39:22 +05:30

157 lines
7.2 KiB
Bash
Executable File

#!/bin/bash
# caution: This script is for production
#
# forces ecs to relaunch the tasks/instances
# when relaunched a new docker image will be pulled
# resulting in rolling out a software/config or just
# restart.
#
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# sends message to slack channel nocohub-deploy
function message(){
echo $@
curl -s -X POST -H "Content-type: application/json" --data "{\"text\":\"${@}\"}" https://hooks.slack.com/services/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
}
function log_and_exit(){
echo $@
message "${ENVIRONMENT}: deployment failed. Check logs for details. ${@}"
exit 1
}
# expects ALL_SVS and CLUSTER variable to be set to check status
function update_workspace(){
if [[ ! "${CLUSTER}" || ! "${ALL_SVS}" ]]; then echo "CLUSTER and ALL_SVS variables must be set for update_workspace"; log_and_exit ; fi
for SVC in ${ALL_SVS}
do
if [[ ${EXCLUDED_SVC} =~ " ${SVC} " ]]
then
echo "skip updating service : ${SVC}"
else
DEPLOY_OUT=$(aws ecs update-service --cluster ${CLUSTER} --service ${SVC} --force-new-deployment --region=us-east-2 )
echo "updated service : ${SVC}"
fi
done
}
# expects ALL_SVS and CLUSTER variable to be set to check status
function check_status_all_workspaces(){
# start after initial sleep to avoid race
echo "Adding initial delay of 30 sec to check status "
sleep 30;
if [[ ! "${CLUSTER}" || ! "${ALL_SVS}" ]]; then echo "CLUSTER and ALL_SVS variables must be set for check status"; log_and_exit ; fi
for SVC in ${ALL_SVS}
do
if [[ ${EXCLUDED_SVC} =~ " ${SVC} " ]]
then
echo "skip status check for service : ${SVC}"
else
checkStatus "${SVC}"
fi
done
}
function checkStatus(){
# check if all deployments in the service is set to COMPLETED
local service=${1}
if [[ ! "${CLUSTER}" ]]; then echo "CLUSTER and service variable must be set for check status"; log_and_exit ; fi
local STATUS=$(aws ecs describe-services --cluster ${CLUSTER} --service ${service} --region us-east-2 | jq .services[].deployments[].rolloutState -r)
echo "First Check: ECS deployment status: ${STATUS} for ${service}.Retry count: ${global_retry_count}"
while [[ ${global_retry_count} -lt 20 && "${STATUS}" == *"IN_PROGRESS"* ]]
do
STATUS=$(aws ecs describe-services --cluster ${CLUSTER} --service ${service} --region us-east-2 | jq .services[].deployments[].rolloutState -r)
global_retry_count=$((global_retry_count+1))
echo "ECS deployment status: ${STATUS} for ${service}. Retry after 30 seconds. Retry count: ${global_retry_count}"
if [[ "${STATUS}" == *"IN_PROGRESS"* ]]; then
sleep 30;
fi
done
if [[ "${STATUS}" == *"IN_PROGRESS"* ]]
then
message "${ENVIRONMENT}: deployment status is IN_PROGRESS after waiting for about 10 mins for workspace ${service}. check status at https://us-east-2.console.aws.amazon.com/ecs/v2/clusters/${CLUSTER}/services/${service}/tasks?region=us-east-2 "
else
message "${ENVIRONMENT}: deployment completed successfully for workspace : ${service}"
fi
}
#
# checks the current count of instances
# updates it to double
# runs the check on instances with wait time of 10 seconds
function prewarm_asg(){
if [[ ! "${ASG_NAME}" ]]; then echo "ASG_NAME variables must be set for pre-warming"; log_and_exit ; fi
# Get the current desired count
prev_count=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${ASG_NAME} --query 'AutoScalingGroups[0].DesiredCapacity' --output text)
# Double the current count
new_count=$((prev_count * 2))
message "${ENVIRONMENT}: prewarming initiating. previous_count: ${prev_count} new_count: ${new_count} "
# Update the desired count to be double
aws autoscaling set-desired-capacity --auto-scaling-group-name ${ASG_NAME} --desired-capacity $new_count
# Wait for the new instances to launch with doubled count
timeout=10
while [[ $timeout -gt 0 ]]; do
current_count=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${ASG_NAME} --query 'AutoScalingGroups[0].Instances[?LifecycleState==`InService`].InstanceId' --output text | wc -w)
if [[ $current_count -eq $new_count ]]; then
break
fi
sleep 1
((timeout--))
done
message "${ENVIRONMENT}: prewarming completed successfully. previous_count: ${prev_count} new_count: ${new_count} "
}
# calls api to perform pause and exit the worker pods. the worker pods will pause and wait till any inflight jobs which are in progress
# once the instance exits, ecs takes care of adding new instance with new image
function pause_workers_and_gracefully_shutdown(){
echo "pause_workers_and_gracefully_shutdown: Expected varibles to be set CLUSTER=${CLUSTER} WORKERS_SERVICE_NAME=${WORKERS_SERVICE_NAME} HOST_NAME=${HOST_NAME} API_CREDENTIALS=$([[ ! -z "$API_CREDENTIALS" ]] && echo "***value-set***" || echo "Empty")"
if [[ ! "${CLUSTER}" || ! "${WORKERS_SERVICE_NAME}" || ! "${HOST_NAME}" || ! "${API_CREDENTIALS}" ]]; then log_and_exit "CLUSTER=${CLUSTER} WORKERS_SERVICE_NAME=${WORKERS_SERVICE_NAME} HOST_NAME=${HOST_NAME} API_CREDENTIALS variables must be set" ; fi
# 0. fetch capacity details
# Replace these placeholders with your values
CURRENT_DESIRED_COUNT=$(aws ecs describe-services --cluster $CLUSTER --services $WORKERS_SERVICE_NAME --query 'services[0].desiredCount' --output text)
NEW_DESIRED_COUNT=$((CURRENT_DESIRED_COUNT + 1 ))
HOST_NAME=${HOST_NAME:-https://staging.noco.to}
# 1. trigger pause and exit
curl -u ${API_CREDENTIALS} ${HOST_NAME}/internal/workers/pause-and-exit -XPOST || exit 1
# 2. update desired to double
# Update the service with the new desired count
# aws ecs update-service --cluster $CLUSTER --service $WORKERS_SERVICE_NAME --desired-count $NEW_DESIRED_COUNT
message "${ENVIRONMENT}: workers pod rollout triggered"
}
function perform_rollout(){
PROMOTE_IMAGE_BEFORE_ROLLOUT=${1:-false}
if [[ ! "${ENVIRONMENT}" || ! "${CLUSTER}" ]]; then echo "CLUSTER and ENVIRONMENT variables must be set for check status"; log_and_exit ; fi
global_retry_count=0
message "${ENVIRONMENT}: deployment started."
if [[ "${PROMOTE_IMAGE_BEFORE_ROLLOUT}" == "true" && "${ENVIRONMENT}" == "Production" ]]
then
message "${ENVIRONMENT}: promoting ws-pre-release to ws before rollout."
${SCRIPT_DIR}/image_promote.sh "${ECR_REPO_NAME}" "${PRE_REL_STAGE_TAG}" "${STAGE_TAG}"
fi
latest_remote_digest=$(aws ecr batch-get-image --region us-east-2 --repository-name ${REPO_NAME:-nocohub} --image-ids imageTag=${STAGE_TAG} --output text --query images[].imageId )
message "${ENVIRONMENT}: Image with tag:${STAGE_TAG} will be launched. digest: ${latest_remote_digest}"
# TODO: prewarm ASG to have additional instances. update only desired
ALL_SVS=$( aws ecs list-services --cluster ${CLUSTER} --region=us-east-2 | jq -r '.serviceArns[] | split("/") | .[2]')
# update_workspace
# check_status_all_workspaces
message "${ENVIRONMENT}: deployment script executed successfully."
}