nv-torch-latest-v100.yml 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. name: nv-torch-latest-v100
  2. on:
  3. pull_request:
  4. paths-ignore:
  5. - 'docs/**'
  6. - 'blogs/**'
  7. merge_group:
  8. branches: [ master ]
  9. schedule:
  10. - cron: "0 0 * * *"
  11. concurrency:
  12. group: ${{ github.workflow }}-${{ github.ref }}
  13. cancel-in-progress: true
  14. jobs:
  15. unit-tests:
  16. runs-on: [self-hosted, nvidia, cu116, v100]
  17. steps:
  18. - uses: actions/checkout@v3
  19. - id: setup-venv
  20. uses: ./.github/workflows/setup-venv
  21. - name: Install pytorch
  22. run: |
  23. pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu118
  24. python -c "import torch; print('torch:', torch.__version__, torch)"
  25. python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
  26. - name: Install transformers
  27. run: |
  28. git clone https://github.com/huggingface/transformers
  29. cd transformers
  30. # if needed switch to the last known good SHA until transformers@master is fixed
  31. # git checkout 1cc453d33
  32. git rev-parse --short HEAD
  33. pip install .
  34. - name: Install deepspeed
  35. run: |
  36. pip install .[dev,1bit,autotuning]
  37. ds_report
  38. - name: Python environment
  39. run: |
  40. pip list
  41. - name: Unit tests
  42. run: |
  43. unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
  44. cd tests
  45. coverage run --concurrency=multiprocessing -m pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="2.1" --cuda_ver="11.8"
  46. coverage run --concurrency=multiprocessing -m pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="2.1" --cuda_ver="11.8"
  47. - name: Coverage report
  48. run: |
  49. cd tests
  50. coverage combine
  51. coverage report -m