1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- #!/bin/bash
- azure_config=azure_config.json
- if [ ! -f ${azure_config} ]; then
- echo "Cannot find $azure_config"
- exit 1
- fi
- location=`cat ${azure_config} | jq .location | sed 's/"//g'`
- rg=deepspeed_rg_$location
- parallel=true
- command -v pdsh
- if [ $? != 0 ]; then
- echo "Installing pdsh will allow for the docker pull to be done in parallel across the cluster. See: 'apt-get install pdsh'"
- parallel=false
- fi
- ssh_key=`cat ${azure_config} | jq .ssh_private_key | sed 's/"//g'`
- if [ $ssh_key == "null" ]; then echo 'missing ssh_private_key in config'; exit 1; fi
- num_vms=`cat ${azure_config} | jq .num_vms`
- if [ $num_vms == "null" ]; then echo 'missing num_vms in config'; exit 1; fi
- args="-i ${ssh_key} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
- username=deepspeed
- update_script="
- docker pull deepspeed/deepspeed:latest;
- ln -s workdir/DeepSpeed/azure/attach.sh attach.sh;
- cd workdir/DeepSpeed;
- git pull;
- git submodule update --init --recursive;
- bash azure/start_container.sh;
- "
- if [ $parallel == true ]; then
- echo "parallel docker pull"
- hosts=""
- for node_id in {0..1}; do
- addr=`az vm list-ip-addresses -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
- hosts="${addr},${hosts}"
- done
- PDSH_RCMD_TYPE=ssh PDSH_SSH_ARGS_APPEND=${args} pdsh -w $hosts -l ${username} $update_script
- else
- echo "sequential docker pull"
- for node_id in `seq 0 $((num_vms - 1))`; do
- ip_addr=`az vm list-ip-addresses -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
- addr=${username}@${ip_addr}
- ssh ${args} $addr $update_script
- done
- fi
|