install.sh 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. #!/bin/bash
  2. set -e
  3. err_report() {
  4. echo "Error on line $1"
  5. echo "Fail to install deepspeed"
  6. }
  7. trap 'err_report $LINENO' ERR
  8. usage() {
  9. echo """
  10. Usage: install.sh [options...]
  11. By default will install deepspeed and all third party dependecies accross all machines listed in
  12. hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally
  13. [optional]
  14. -l, --local_only Install only on local machine
  15. -s, --pip_sudo Run pip install with sudo (default: no sudo)
  16. -r, --allow_sudo Allow script to be run by root (probably don't want this, instead use --pip_sudo)
  17. -n, --no_clean Do not clean prior build state, by default prior build files are removed before building wheels
  18. -m, --pip_mirror Use the specified pip mirror (default: the default pip mirror)
  19. -H, --hostfile Path to MPI-style hostfile (default: /job/hostfile)
  20. -v, --verbose Verbose logging
  21. -h, --help This help text
  22. """
  23. }
  24. ds_only=0
  25. tp_only=0
  26. deepspeed_install=1
  27. third_party_install=1
  28. local_only=0
  29. pip_sudo=0
  30. entire_dlts_job=1
  31. hostfile=/job/hostfile
  32. pip_mirror=""
  33. apex_commit=""
  34. skip_requirements=0
  35. allow_sudo=0
  36. no_clean=0
  37. verbose=0
  38. while [[ $# -gt 0 ]]
  39. do
  40. key="$1"
  41. case $key in
  42. -s|--pip_sudo)
  43. pip_sudo=1;
  44. shift
  45. ;;
  46. -m|--pip_mirror)
  47. pip_mirror=$2;
  48. shift
  49. shift
  50. ;;
  51. -v|--verbose)
  52. verbose=1;
  53. shift
  54. ;;
  55. -r|--allow_sudo)
  56. allow_sudo=1;
  57. shift
  58. ;;
  59. -n|--no_clean)
  60. no_clean=1;
  61. shift
  62. ;;
  63. -H|--hostfile)
  64. hostfile=$2
  65. if [ ! -f $2 ]; then
  66. echo "User provided hostfile does not exist at $hostfile, exiting"
  67. exit 1
  68. fi
  69. shift
  70. shift
  71. ;;
  72. -h|--help)
  73. usage
  74. exit 0
  75. ;;
  76. *)
  77. echo "Unkown argument(s)"
  78. usage
  79. exit 1
  80. shift
  81. ;;
  82. esac
  83. done
  84. user=`whoami`
  85. if [ "$allow_sudo" == "0" ]; then
  86. if [ "$user" == "root" ]; then
  87. echo "WARNING: running as root, if you want to install DeepSpeed with sudo please use -s/--pip_sudo instead"
  88. usage
  89. exit 1
  90. fi
  91. fi
  92. if [ "$ds_only" == "1" ] && [ "$tp_only" == "1" ]; then
  93. echo "-d and -t are mutually exclusive, only choose one or none"
  94. usage
  95. exit 1
  96. fi
  97. if [ "$verbose" == "1" ]; then
  98. VERBOSE="-v"
  99. else
  100. VERBOSE=""
  101. fi
  102. rm_if_exist() {
  103. echo "Attempting to remove $1"
  104. if [ -f $1 ]; then
  105. rm $VERBOSE $1
  106. elif [ -d $1 ]; then
  107. rm -r $VERBOSE $1
  108. fi
  109. }
  110. if [ "$no_clean" == "0" ]; then
  111. # remove deepspeed build files
  112. rm_if_exist deepspeed/git_version_info_installed.py
  113. rm_if_exist dist
  114. rm_if_exist build
  115. rm_if_exist deepspeed.egg-info
  116. fi
  117. if [ "$pip_sudo" == "1" ]; then
  118. PIP_SUDO="sudo -H"
  119. else
  120. PIP_SUDO=""
  121. fi
  122. if [ "$pip_mirror" != "" ]; then
  123. PIP_INSTALL="pip install $VERBOSE -i $pip_mirror"
  124. else
  125. PIP_INSTALL="pip install $VERBOSE"
  126. fi
  127. if [ ! -f $hostfile ]; then
  128. echo "No hostfile exists at $hostfile, installing locally"
  129. local_only=1
  130. fi
  131. echo "Building deepspeed wheel"
  132. python setup.py $VERBOSE bdist_wheel
  133. if [ "$local_only" == "1" ]; then
  134. echo "Installing deepspeed"
  135. $PIP_SUDO pip uninstall -y deepspeed
  136. $PIP_SUDO $PIP_INSTALL dist/deepspeed*.whl
  137. ds_report
  138. else
  139. local_path=`pwd`
  140. if [ -f $hostfile ]; then
  141. hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
  142. else
  143. echo "hostfile not found, cannot proceed"
  144. exit 1
  145. fi
  146. export PDSH_RCMD_TYPE=ssh
  147. tmp_wheel_path="/tmp/deepspeed_wheels"
  148. pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; else mkdir -pv $tmp_wheel_path; fi"
  149. pdcp -w $hosts requirements/requirements.txt ${tmp_wheel_path}/
  150. echo "Installing deepspeed"
  151. pdsh -w $hosts "$PIP_SUDO pip uninstall -y deepspeed"
  152. pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
  153. pdsh -w $hosts "$PIP_SUDO $PIP_INSTALL $tmp_wheel_path/deepspeed*.whl"
  154. pdsh -w $hosts "ds_report"
  155. pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; rmdir $tmp_wheel_path; fi"
  156. fi