jobs: - job: DeepSpeed_Tests timeoutInMinutes: 360 pool: name: 'DS_testing' strategy: matrix: PyTorch12-CUDA100: python.version: '3.6' cuda.version: '10.0' pytorch.version: '1.2' torchvision.version: '0.4.0' runmodeltests: true #PyTorch15-CUDA101: # python.version: '3.7' # cuda.version: '10.1' # pytorch.version: '1.5.0+cu101' # torchvision.version: '0.6.0+cu101' # runmodeltests: true ##PyTorch15-CUDA102: # python.version: '3.7' # cuda.version: '10.2' # pytorch.version: '1.5' # torchvision.version: '0.6.1' # runmodeltests: true variables: conda_env: 'ds_test_py$(python.version)_cuda$(cuda.version)_pytorch$(pytorch.version)' steps: # Unfortunately nvidia's nvcc_linux-64= seems to install 10.1 regardless? # Most of this complexity is a workaround to get the compiler toolchain to match the # cudatoolkit runtime - script: | conda create --force --yes -n $(conda_env) python=$(python.version) cudatoolkit=$(cuda.version) source activate $(conda_env) conda install -q --yes conda conda install -q --yes pip conda install -q --yes gxx_linux-64 if [[ $(cuda.version) != "10.2" ]]; then conda install --yes -c conda-forge cudatoolkit-dev=$(cuda.version) ; fi displayName: 'Setup environment python=$(python.version) pytorch=$(pytorch.version) cuda=$(cuda.version)' # Manually install torch/torchvision first to enforce versioning. - script: | source activate $(conda_env) pip install --progress-bar=off torch==$(pytorch.version) torchvision==$(torchvision.version) #-f https://download.pytorch.org/whl/torch_stable.html ./install.sh --local_only #python -I basic_install_test.py displayName: 'Install DeepSpeed' - script: | source activate $(conda_env) which python python --version which nvcc nvcc --version which deepspeed python -c "import torch; print('torch:', torch.__version__, torch)" python -c "import torch; print('CUDA available:', torch.cuda.is_available())" python -c "import deepspeed; print('deepspeed:', deepspeed.__version__)" displayName: 'Show environment' - script: | source activate $(conda_env) pytest --durations=0 --forked --verbose -x tests/unit/ displayName: 'Unit tests' - script: | source activate $(conda_env) ln -s /data/Megatron-LM/data DeepSpeedExamples/Megatron-LM/ pip install --progress-bar=off -r DeepSpeedExamples/Megatron-LM/requirements.txt cd tests/model/ rm -rf BingBertSquad/baseline rm -rf Megatron_GPT2/baseline pytest --durations=0 -s run_sanity_check.py condition: and(succeeded(), eq(variables['runmodeltests'], true)) displayName: 'Model tests' #BingBertSquad logs - task: PublishPipelineArtifact@1 inputs: targetPath: '$(Build.SourcesDirectory)/tests/model/BingBertSquad/test/' artifactName: BingBertSquad_logs displayName: 'BingBertSquad log uploads' condition: eq(variables['runmodeltests'], true) - job: Code_Quality_Checks pool: name: 'DS_testing' variables: conda_env: 'ds_codetest' steps: - script: | conda create --force --yes -n $(conda_env) python=3.7 source activate $(conda_env) displayName: 'Create code test environment' - script: | source activate $(conda_env) pip install pre-commit pre-commit run --all-files displayName: 'Formatting checks' - script: | source activate $(conda_env) pip install pylint pylint --exit-zero deepspeed/ displayName: 'Code linter'