nv-accelerate-v100.yml 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. name: nv-accelerate-v100
  2. on:
  3. push:
  4. branches:
  5. - 'master'
  6. - 'staging**'
  7. paths-ignore:
  8. - 'docs/**'
  9. pull_request:
  10. paths-ignore:
  11. - 'docs/**'
  12. concurrency:
  13. group: ${{ github.workflow }}-${{ github.ref }}
  14. cancel-in-progress: true
  15. jobs:
  16. unit-tests:
  17. runs-on: [self-hosted, nvidia, cu111, v100]
  18. steps:
  19. - uses: actions/checkout@v2
  20. - name: environment
  21. run: |
  22. nvidia-smi
  23. which python
  24. python --version
  25. which nvcc
  26. nvcc --version
  27. pip install --upgrade pip
  28. pip uninstall --yes torch torchvision
  29. pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html
  30. python -c "import torch; print('torch:', torch.__version__, torch)"
  31. python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
  32. - name: Python environment
  33. run: |
  34. pip list
  35. - name: Install deepspeed
  36. run: |
  37. pip uninstall --yes deepspeed
  38. pip install .[dev,autotuning]
  39. ds_report
  40. - name: HF Accelerate tests
  41. run: |
  42. if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
  43. git clone https://github.com/huggingface/accelerate
  44. cd accelerate
  45. # installing dependencies
  46. pip install .[testing]
  47. # force protobuf version due to issues
  48. pip install "protobuf<4.21.0"
  49. # tmp fix: force newer datasets version
  50. pip install "datasets>=2.0.0"
  51. pip list
  52. TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose tests/deepspeed