install.sh 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #!/bin/bash
  2. set -e
  3. err_report() {
  4. echo "Error on line $1"
  5. echo "Fail to install deepspeed"
  6. }
  7. trap 'err_report $LINENO' ERR
  8. usage() {
  9. echo """
  10. Usage: install.sh [options...]
  11. By default will install deepspeed and all third party dependencies across all machines listed in
  12. hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally
  13. [optional]
  14. -l, --local_only Install only on local machine
  15. -s, --pip_sudo Run pip install with sudo (default: no sudo)
  16. -r, --allow_sudo Allow script to be run by root (probably don't want this, instead use --pip_sudo)
  17. -n, --no_clean Do not clean prior build state, by default prior build files are removed before building wheels
  18. -m, --pip_mirror Use the specified pip mirror (default: the default pip mirror)
  19. -H, --hostfile Path to MPI-style hostfile (default: /job/hostfile)
  20. -e, --examples Checkout deepspeed example submodule (no install)
  21. -v, --verbose Verbose logging
  22. -h, --help This help text
  23. """
  24. }
  25. ds_only=0
  26. local_only=0
  27. pip_sudo=0
  28. entire_dlts_job=1
  29. hostfile=/job/hostfile
  30. pip_mirror=""
  31. skip_requirements=0
  32. allow_sudo=0
  33. no_clean=0
  34. verbose=0
  35. examples=0
  36. while [[ $# -gt 0 ]]
  37. do
  38. key="$1"
  39. case $key in
  40. -l|--local_only)
  41. local_only=1;
  42. shift
  43. ;;
  44. -s|--pip_sudo)
  45. pip_sudo=1;
  46. shift
  47. ;;
  48. -m|--pip_mirror)
  49. pip_mirror=$2;
  50. shift
  51. shift
  52. ;;
  53. -v|--verbose)
  54. verbose=1;
  55. shift
  56. ;;
  57. -r|--allow_sudo)
  58. allow_sudo=1;
  59. shift
  60. ;;
  61. -n|--no_clean)
  62. no_clean=1;
  63. shift
  64. ;;
  65. -H|--hostfile)
  66. hostfile=$2
  67. if [ ! -f $2 ]; then
  68. echo "User-provided hostfile does not exist at $hostfile, exiting"
  69. exit 1
  70. fi
  71. shift
  72. shift
  73. ;;
  74. -e|--examples)
  75. examples=1
  76. shift
  77. ;;
  78. -h|--help)
  79. usage
  80. exit 0
  81. ;;
  82. *)
  83. echo "Unknown argument(s)"
  84. usage
  85. exit 1
  86. shift
  87. ;;
  88. esac
  89. done
  90. user=`whoami`
  91. if [ "$allow_sudo" == "0" ]; then
  92. if [ "$user" == "root" ]; then
  93. echo "WARNING: running as root, if you want to install DeepSpeed with sudo please use -s/--pip_sudo instead"
  94. usage
  95. exit 1
  96. fi
  97. fi
  98. if [ "$examples" == "1" ]; then
  99. git submodule update --init --recursive
  100. exit 0
  101. fi
  102. if [ "$verbose" == "1" ]; then
  103. VERBOSE="-v"
  104. PIP_VERBOSE=""
  105. else
  106. VERBOSE=""
  107. PIP_VERBOSE="--disable-pip-version-check"
  108. fi
  109. rm_if_exist() {
  110. echo "Attempting to remove $1"
  111. if [ -f $1 ]; then
  112. rm $VERBOSE $1
  113. elif [ -d $1 ]; then
  114. rm -r $VERBOSE $1
  115. fi
  116. }
  117. if [ "$no_clean" == "0" ]; then
  118. # remove deepspeed build files
  119. rm_if_exist deepspeed/git_version_info_installed.py
  120. rm_if_exist dist
  121. rm_if_exist build
  122. rm_if_exist deepspeed.egg-info
  123. fi
  124. if [ "$pip_sudo" == "1" ]; then
  125. PIP_SUDO="sudo -H"
  126. else
  127. PIP_SUDO=""
  128. fi
  129. if [ "$pip_mirror" != "" ]; then
  130. PIP_INSTALL="pip install $VERBOSE $PIP_VERBOSE -i $pip_mirror"
  131. else
  132. PIP_INSTALL="pip install $VERBOSE $PIP_VERBOSE"
  133. fi
  134. if [ ! -f $hostfile ]; then
  135. echo "No hostfile exists at $hostfile, installing locally"
  136. local_only=1
  137. fi
  138. echo "Building deepspeed wheel"
  139. python setup.py $VERBOSE bdist_wheel
  140. if [ "$local_only" == "1" ]; then
  141. echo "Installing deepspeed"
  142. # $PIP_SUDO pip uninstall -y deepspeed
  143. $PIP_SUDO $PIP_INSTALL dist/deepspeed*.whl
  144. ds_report
  145. else
  146. local_path=`pwd`
  147. if [ -f $hostfile ]; then
  148. hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
  149. else
  150. echo "hostfile not found, cannot proceed"
  151. exit 1
  152. fi
  153. export PDSH_RCMD_TYPE=ssh
  154. tmp_wheel_path="/tmp/deepspeed_wheels"
  155. pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*; else mkdir -pv $tmp_wheel_path; fi"
  156. pdcp -w $hosts requirements/requirements.txt ${tmp_wheel_path}/
  157. echo "Installing deepspeed"
  158. pdsh -w $hosts "$PIP_SUDO pip uninstall -y deepspeed"
  159. pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
  160. pdsh -w $hosts "$PIP_SUDO $PIP_INSTALL $tmp_wheel_path/deepspeed*.whl"
  161. pdsh -w $hosts "ds_report"
  162. pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; rm $tmp_wheel_path/*.txt; rmdir $tmp_wheel_path; fi"
  163. fi