openoker
/
ray


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
							"""Job Submission CUDA available test

Checks that GPU resources are available in the job submission
driver script.

This file is a driver script to be submitted to a Ray cluster via
the Ray Jobs API. This is done by specifying `type: job` in
`release_tests.yaml` (as opposed to, say, `type: sdk_command`).

Release test for https://github.com/ray-project/ray/issues/24455

Test owner: architkulkarni
"""

import ray
import torch
from ray._private.test_utils import wait_for_condition

ray.init()

# Assert that GPU resources are available in the driver script
assert torch.cuda.is_available(), "CUDA is not available in the driver script"


# For good measure, let's also check that we can use the GPU
# in a remote function.
@ray.remote(num_gpus=0.1)
def f():
    return ray.get_gpu_ids()


assert ray.get(f.remote()) == [0]

# Also check that non-GPU tasks can be scheduled across all nodes.
NUM_NODES = 2


@ray.remote(num_cpus=1, scheduling_strategy="SPREAD")
def get_node_id():
    return ray.get_runtime_context().get_node_id()


node_ids = set(ray.get([get_node_id.remote() for _ in range(100)]))


def check_num_nodes_and_spawn_tasks():
    node_ids.update(ray.get([get_node_id.remote() for _ in range(10)]))
    return len(node_ids) >= NUM_NODES


wait_for_condition(check_num_nodes_and_spawn_tasks)