openoker
/
ray


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
							group: rllib tests
depends_on:
  - forge
steps:
  # builds
  - name: rllibbuild
    wanda: ci/docker/rllib.build.wanda.yaml
    depends_on: oss-ci-base_ml
    env:
      IMAGE_FROM: cr.ray.io/rayproject/oss-ci-base_ml
      IMAGE_TO: rllibbuild
      RAYCI_IS_GPU_BUILD: "false"

  - name: rllibgpubuild
    wanda: ci/docker/rllib.build.wanda.yaml
    depends_on: oss-ci-base_gpu
    env:
      IMAGE_FROM: cr.ray.io/rayproject/oss-ci-base_gpu
      IMAGE_TO: rllibgpubuild
      RAYCI_IS_GPU_BUILD: "true"

  # tests
  - label: ":brain: rllib: algorithm, model and others"
    tags: rllib_directly
    parallelism: 4
    instance_type: large
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
        --except-tags learning_tests,memory_leak_tests,examples,tests_dir,documentation,multi_gpu,no_cpu,torch_2.x_only_benchmark,manual
        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
    depends_on: rllibbuild

  - label: ":brain: rllib: learning tests pytorch"
    tags: rllib
    parallelism: 5
    instance_type: large
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
        --only-tags fake_gpus,learning_tests_discrete,crashing_cartpole,stateless_cartpole,learning_tests_continuous
        --except-tags tf_only,tf2_only,gpu,multi_gpu,learning_tests_pytorch_use_all_core
        --test-arg --framework=torch
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
        --only-tags learning_tests_pytorch_use_all_core
        --except-tags tf_only,tf2_only,gpu,multi_gpu
        --test-arg --framework=torch
        --skip-ray-installation
    depends_on: rllibbuild

  - label: ":brain: rllib: examples"
    tags: rllib
    parallelism: 5
    instance_type: large
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 2
        --only-tags examples
        --except-tags multi_gpu,gpu,examples_use_all_core 
        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
        --only-tags examples_use_all_core
        --skip-ray-installation
        --except-tags multi_gpu,gpu 
        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
    depends_on: rllibbuild

  - label: ":brain: rllib: tests dir"
    tags: rllib_directly
    parallelism: 2
    instance_type: large
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
        --only-tags tests_dir
        --except-tags multi_gpu,manual
        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
    depends_on: rllibbuild

  - label: ":brain: rllib: gpu tests"
    tags: 
      - rllib_gpu
      - gpu
    parallelism: 5
    instance_type: gpu
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
        --build-name rllibgpubuild
        --only-tags gpu
        --test-env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 
        --test-env=RLLIB_NUM_GPUS=1
    depends_on: rllibgpubuild

  - label: ":brain: rllib: data tests"
    if: build.branch != "master"
    tags:
      - data
      - rllib
    instance_type: large
    commands:
      # learning tests pytorch
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --parallelism-per-worker 3
        --only-tags learning_tests_with_ray_data
        --except-tags multi_gpu,gpu,tf_only,tf2_only
        --test-arg --framework=torch

      # rllib unittests
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --parallelism-per-worker 3
        --only-tags ray_data
        --except-tags learning_tests_with_ray_data,multi_gpu,gpu
        --skip-ray-installation # reuse the same docker image as the previous run
    depends_on: rllibbuild

  - label: ":brain: rllib: benchmarks"
    tags: rllib
    instance_type: medium
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --only-tags torch_2.x_only_benchmark
    depends_on: rllibbuild

  - label: ":brain: rllib: memory leak pytorch tests"
    tags: rllib
    instance_type: medium
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --only-tags memory_leak_tests
        --except-tags flaky
        --test-arg --framework=torch
    depends_on: rllibbuild

  - label: ":brain: rllib: doc tests"
    tags: 
      - rllib_directly
      - doc
    instance_type: medium
    commands:
      # doc tests
      - bazel run //ci/ray_ci:test_in_docker -- python/ray/... //doc/... rllib 
        --except-tags gpu
        --only-tags doctest
        --parallelism-per-worker 2
      # doc examples
      - bazel run //ci/ray_ci:test_in_docker -- //doc/... rllib 
        --except-tags gpu,post_wheel_build,timeseries_libs,doctest
        --parallelism-per-worker 2
        --skip-ray-installation
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --only-tags documentation
        --parallelism-per-worker 2
        --skip-ray-installation
    depends_on: rllibbuild

  - label: ":brain: rllib: multi-gpu tests"
    tags: 
      - rllib_gpu
      - gpu
    parallelism: 5
    instance_type: gpu-large
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
        --parallelism-per-worker 2
        --gpus 4
        --build-name rllibgpubuild
        --only-tags multi_gpu
    depends_on: rllibgpubuild

  - label: ":brain: rllib: flaky multi-gpu tests"
    key: rllib_flaky_multi_gpu_tests
    tags: 
      - rllib_gpu
      - gpu
      - skip-on-premerge
    instance_type: gpu-large
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
        --parallelism-per-worker 2
        --gpus 4
        --build-name rllibgpubuild
        --only-tags multi_gpu
    depends_on: rllibgpubuild
    soft_fail: true

  - label: ":brain: rllib: flaky gpu tests"
    key: rllib_flaky_gpu_tests
    tags: 
      - rllib_gpu
      - gpu
      - skip-on-premerge
    instance_type: gpu
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
        --build-name rllibgpubuild
        --only-tags gpu
        --test-env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 
        --test-env=RLLIB_NUM_GPUS=1
    depends_on: rllibgpubuild
    soft_fail: true

  - label: ":brain: rllib: flaky tests (learning tests)"
    key: rllib_flaky_tests_01
    tags: 
      - rllib
      - skip-on-premerge
    instance_type: large
    commands:
      # torch
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
        --only-tags fake_gpus,learning_tests_discrete,learning_tests_with_ray_data,crashing_cartpole,stateless_cartpole,learning_tests_continuous
        --except-tags tf_only,tf2_only,multi_gpu,gpu
        --test-arg --framework=torch

      # tf2-static-graph
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
        --only-tags tf_only
        --except-tags torch_only,tf2_only,no_tf_static_graph,multi_gpu,gpu
        --test-arg --framework=tf
        --skip-ray-installation # reuse the same docker image as the previous run

      # tf2-eager-tracing
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
        --only-tags tf2_only
        --except-tags fake_gpus,torch_only,multi_gpu,no_tf_eager_tracing,gpu
        --test-arg --framework=tf2
        --skip-ray-installation # reuse the same docker image as the previous run
    depends_on: rllibbuild
    soft_fail: true

  - label: ":brain: rllib: flaky tests (examples/rlmodule/models/tests_dir)"
    key: rllib_flaky_tests_02
    tags: 
      - rllib
      - skip-on-premerge
    instance_type: large
    commands:
      # examples
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib  --run-flaky-tests --parallelism-per-worker 3
        --only-tags examples
        --except-tags multi_gpu,gpu 
        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1

      # rlmodule tests
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
        --only-tags rlm
         --except-tags multi_gpu,gpu
        --test-env RLLIB_ENABLE_RL_MODULE=1
        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
        --skip-ray-installation # reuse the same docker image as the previous run

      # algorithm, models
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib  --run-flaky-tests --parallelism-per-worker 3
        --except-tags learning_tests,memory_leak_tests,examples,tests_dir,documentation,multi_gpu,gpu,no_cpu,torch_2.x_only_benchmark,manual
        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
        --skip-ray-installation # reuse the same docker image as the previous run

      # tests/ dir
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
        --only-tags tests_dir
        --except-tags multi_gpu,gpu,manual
        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
        --skip-ray-installation # reuse the same docker image as the previous run
    depends_on: rllibbuild
    soft_fail: true

  - label: ":brain: rllib: flaky tests (memory leak)"
    key: rllib_flaky_tests_03
    tags: 
      - rllib
      - skip-on-premerge
    instance_type: medium
    commands:
      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
        --run-flaky-tests
        --only-tags memory_leak_tests
        --except-tags flaky,gpu,multi_gpu
        --test-arg --framework=tf2
    depends_on: rllibbuild
    soft_fail: true