nv-accelerate-v100.yml 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. name: nv-accelerate-v100
  2. on:
  3. pull_request:
  4. paths-ignore:
  5. - 'docs/**'
  6. - 'blogs/**'
  7. - 'deepspeed/inference/v2/**'
  8. - 'tests/unit/inference/v2/**'
  9. merge_group:
  10. branches: [ master ]
  11. schedule:
  12. - cron: "0 0 * * *"
  13. concurrency:
  14. group: ${{ github.workflow }}-${{ github.ref }}
  15. cancel-in-progress: true
  16. jobs:
  17. unit-tests:
  18. runs-on: [self-hosted, nvidia, cu116, v100]
  19. steps:
  20. - uses: actions/checkout@v3
  21. - id: setup-venv
  22. uses: ./.github/workflows/setup-venv
  23. - name: Install pytorch
  24. run: |
  25. pip install -U --cache-dir $TORCH_CACHE torch --index-url https://download.pytorch.org/whl/cu118
  26. python -c "import torch; print('torch:', torch.__version__, torch)"
  27. python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
  28. - name: Install deepspeed
  29. run: |
  30. pip install .[dev,autotuning]
  31. ds_report
  32. - name: Python environment
  33. run: |
  34. pip list
  35. - name: HF Accelerate tests
  36. run: |
  37. unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
  38. git clone https://github.com/huggingface/accelerate
  39. cd accelerate
  40. git rev-parse --short HEAD
  41. # installing dependencies
  42. pip install .[testing]
  43. # force protobuf version due to issues
  44. pip install "protobuf<4.21.0"
  45. # tmp fix: force newer datasets version
  46. #pip install "datasets>=2.0.0"
  47. pip list
  48. pytest $PYTEST_OPTS --color=yes --durations=0 --verbose tests/deepspeed