azure-pipelines.yml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. jobs:
  2. - job: DeepSpeed_Tests
  3. timeoutInMinutes: 360
  4. pool:
  5. name: 'DS_testing'
  6. strategy:
  7. matrix:
  8. PyTorch12-CUDA100:
  9. python.version: '3.6'
  10. cuda.version: '10.0'
  11. pytorch.version: '1.2'
  12. torchvision.version: '0.4.0'
  13. runmodeltests: true
  14. #PyTorch15-CUDA101:
  15. # python.version: '3.7'
  16. # cuda.version: '10.1'
  17. # pytorch.version: '1.5.0+cu101'
  18. # torchvision.version: '0.6.0+cu101'
  19. # runmodeltests: true
  20. ##PyTorch15-CUDA102:
  21. # python.version: '3.7'
  22. # cuda.version: '10.2'
  23. # pytorch.version: '1.5'
  24. # torchvision.version: '0.6.1'
  25. # runmodeltests: true
  26. variables:
  27. conda_env: 'ds_test_py$(python.version)_cuda$(cuda.version)_pytorch$(pytorch.version)'
  28. steps:
  29. # Unfortunately nvidia's nvcc_linux-64=<version> seems to install 10.1 regardless?
  30. # Most of this complexity is a workaround to get the compiler toolchain to match the
  31. # cudatoolkit runtime
  32. - script: |
  33. conda create --force --yes -n $(conda_env) python=$(python.version) cudatoolkit=$(cuda.version)
  34. source activate $(conda_env)
  35. conda install -q --yes conda
  36. conda install -q --yes pip
  37. conda install -q --yes gxx_linux-64
  38. if [[ $(cuda.version) != "10.2" ]]; then conda install --yes -c conda-forge cudatoolkit-dev=$(cuda.version) ; fi
  39. displayName: 'Setup environment python=$(python.version) pytorch=$(pytorch.version) cuda=$(cuda.version)'
  40. # Manually install torch/torchvision first to enforce versioning.
  41. - script: |
  42. source activate $(conda_env)
  43. pip install --progress-bar=off torch==$(pytorch.version) torchvision==$(torchvision.version)
  44. #-f https://download.pytorch.org/whl/torch_stable.html
  45. ./install.sh --local_only
  46. #python -I basic_install_test.py
  47. displayName: 'Install DeepSpeed'
  48. - script: |
  49. source activate $(conda_env)
  50. which python
  51. python --version
  52. which nvcc
  53. nvcc --version
  54. which deepspeed
  55. python -c "import torch; print('torch:', torch.__version__, torch)"
  56. python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
  57. python -c "import deepspeed; print('deepspeed:', deepspeed.__version__)"
  58. displayName: 'Show environment'
  59. - script: |
  60. source activate $(conda_env)
  61. pytest --durations=0 --forked --verbose -x tests/unit/
  62. displayName: 'Unit tests'
  63. - script: |
  64. source activate $(conda_env)
  65. ln -s /data/Megatron-LM/data DeepSpeedExamples/Megatron-LM/
  66. pip install --progress-bar=off -r DeepSpeedExamples/Megatron-LM/requirements.txt
  67. cd tests/model/
  68. rm -rf BingBertSquad/baseline
  69. rm -rf Megatron_GPT2/baseline
  70. pytest --durations=0 -s run_sanity_check.py
  71. condition: and(succeeded(), eq(variables['runmodeltests'], true))
  72. displayName: 'Model tests'
  73. #BingBertSquad logs
  74. - task: PublishPipelineArtifact@1
  75. inputs:
  76. targetPath: '$(Build.SourcesDirectory)/tests/model/BingBertSquad/test/'
  77. artifactName: BingBertSquad_logs
  78. displayName: 'BingBertSquad log uploads'
  79. condition: eq(variables['runmodeltests'], true)
  80. - job: Code_Quality_Checks
  81. pool:
  82. name: 'DS_testing'
  83. variables:
  84. conda_env: 'ds_codetest'
  85. steps:
  86. - script: |
  87. conda create --force --yes -n $(conda_env) python=3.7
  88. source activate $(conda_env)
  89. displayName: 'Create code test environment'
  90. - script: |
  91. source activate $(conda_env)
  92. pip install pre-commit
  93. pre-commit run --all-files
  94. displayName: 'Formatting checks'
  95. - script: |
  96. source activate $(conda_env)
  97. pip install pylint
  98. pylint --exit-zero deepspeed/
  99. displayName: 'Code linter'