setup_docker.sh 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. #!/bin/bash
  2. azure_config=azure_config.json
  3. if [ ! -f ${azure_config} ]; then
  4. echo "Cannot find $azure_config"
  5. exit 1
  6. fi
  7. location=`cat ${azure_config} | jq .location | sed 's/"//g'`
  8. rg=deepspeed_rg_$location
  9. parallel=true
  10. command -v pdsh
  11. if [ $? != 0 ]; then
  12. echo "Installing pdsh will allow for the docker pull to be done in parallel across the cluster. See: 'apt-get install pdsh'"
  13. parallel=false
  14. fi
  15. ssh_key=`cat ${azure_config} | jq .ssh_private_key | sed 's/"//g'`
  16. if [ $ssh_key == "null" ]; then echo 'missing ssh_private_key in config'; exit 1; fi
  17. num_vms=`cat ${azure_config} | jq .num_vms`
  18. if [ $num_vms == "null" ]; then echo 'missing num_vms in config'; exit 1; fi
  19. args="-i ${ssh_key} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
  20. username=deepspeed
  21. update_script="
  22. docker pull deepspeed/deepspeed:latest;
  23. ln -s workdir/DeepSpeed/azure/attach.sh attach.sh;
  24. cd workdir/DeepSpeed;
  25. git pull;
  26. git submodule update --init --recursive;
  27. bash azure/start_container.sh;
  28. "
  29. if [ $parallel == true ]; then
  30. echo "parallel docker pull"
  31. hosts=""
  32. for node_id in {0..1}; do
  33. addr=`az vm list-ip-addresses -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
  34. hosts="${addr},${hosts}"
  35. done
  36. PDSH_RCMD_TYPE=ssh PDSH_SSH_ARGS_APPEND=${args} pdsh -w $hosts -l ${username} $update_script
  37. else
  38. echo "sequential docker pull"
  39. for node_id in `seq 0 $((num_vms - 1))`; do
  40. ip_addr=`az vm list-ip-addresses -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
  41. addr=${username}@${ip_addr}
  42. ssh ${args} $addr $update_script
  43. done
  44. fi