run_tests.sh 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. #!/bin/bash
  2. if [[ ! -d logs ]]
  3. then
  4. mkdir logs
  5. fi
  6. validate_file() {
  7. file=$1
  8. file_name=$2
  9. if [[ -f $file ]]; then
  10. echo "Using ${file_name}: ${file}"
  11. else
  12. echo "${file} not found"
  13. exit 1
  14. fi
  15. }
  16. validate_folder() {
  17. dir=$1
  18. dir_name=$2
  19. if [[ -d ${dir} ]]; then
  20. echo "Using ${dir_name}: ${dir}"
  21. else
  22. echo "${dir} folder not found"
  23. exit 1
  24. fi
  25. }
  26. # Validate path to BingBertSquad script
  27. if [ -z "${BingBertSquad_DIR+x}" ]; then
  28. export BingBertSquad_DIR=../../../DeepSpeedExamples/BingBertSquad
  29. echo "BingBertSquad_DIR environment variable not set; trying default: ${BingBertSquad_DIR}"
  30. fi
  31. validate_folder ${BingBertSquad_DIR} "BingBertSquad_DIR"
  32. fp16_config_json=deepspeed_bsz24_fp16_config.json
  33. validate_file ${fp16_config_json} "fp16_config_json"
  34. fp32_config_json=deepspeed_bsz24_fp32_config.json
  35. validate_file ${fp32_config_json} "fp32_config_json"
  36. start_time=`date +"%D %T"`
  37. echo "---------------begin @ ${start_time}--------------"
  38. # Note: you may play around with commented parts below (num_gpus and nohup command) for simultaneous runs; just make sure your hardware allocation can support it
  39. for num_gpus in 8 1 # 4 2
  40. do
  41. #run_cmd="nohup bash run_BingBertSquad.sh -g ${num_gpus} -d --deepspeed_config ${fp16_config_json} --fp16 > logs/deepspeed_fp16_${num_gpus}_`date +"%Y%m%d%H%M%S"`.out 2> logs/deepspeed_fp16_${num_gpus}_`date +"%Y%m%d%H%M%S"`.err &"
  42. run_cmd="bash run_BingBertSquad.sh -g ${num_gpus} -d --deepspeed_config ${fp16_config_json} --fp16"
  43. start_time=`date +"%D %T"`
  44. echo "---------------begin @ ${start_time}--------------"
  45. echo ${run_cmd}
  46. eval ${run_cmd}
  47. end_time=`date +"%D %T"`
  48. echo "---------------finish @ ${end_time} --------------"
  49. #run_cmd="nohup bash run_BingBertSquad.sh -g ${num_gpus} -d --deepspeed_config ${fp32_config_json} > logs/deepspeed_fp32_${num_gpus}_`date +"%Y%m%d%H%M%S"`.out 2> logs/deepspeed_fp32_${num_gpus}_`date +"%Y%m%d%H%M%S"`.err &"
  50. run_cmd="bash run_BingBertSquad.sh -g ${num_gpus} -d --deepspeed_config ${fp32_config_json}"
  51. start_time=`date +"%D %T"`
  52. echo "---------------begin @ ${start_time}--------------"
  53. echo ${run_cmd}
  54. eval ${run_cmd}
  55. end_time=`date +"%D %T"`
  56. echo "---------------finish @ ${end_time} --------------"
  57. #run_cmd="nohup bash run_BingBertSquad.sh -g ${num_gpus} --fp16 > logs/baseline_fp16_${num_gpus}_`date +"%Y%m%d%H%M%S"`.out 2> logs/baseline_fp16_${num_gpus}_`date +"%Y%m%d%H%M%S"`.err &"
  58. run_cmd="bash run_BingBertSquad.sh -g ${num_gpus} --fp16"
  59. start_time=`date +"%D %T"`
  60. echo "---------------begin @ ${start_time}--------------"
  61. echo ${run_cmd}
  62. eval ${run_cmd}
  63. end_time=`date +"%D %T"`
  64. echo "---------------finish @ ${end_time} --------------"
  65. #run_cmd="nohup bash run_BingBertSquad.sh -g ${num_gpus} > logs/baseline_fp32_${num_gpus}_`date +"%Y%m%d%H%M%S"`.out 2> logs/baseline_fp32_${num_gpus}_`date +"%Y%m%d%H%M%S"`.err &"
  66. run_cmd="bash run_BingBertSquad.sh -g ${num_gpus}"
  67. start_time=`date +"%D %T"`
  68. echo "---------------begin @ ${start_time}--------------"
  69. echo ${run_cmd}
  70. eval ${run_cmd}
  71. end_time=`date +"%D %T"`
  72. echo "---------------finish @ ${end_time} --------------"
  73. done
  74. end_time=`date +"%D %T"`
  75. echo "---------------finish @ ${end_time} --------------"
  76. set +x