openoker
/
ray


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435
							two-step-game-maddpg:
    env: ray.rllib.examples.env.two_step_game.TwoStepGame
    run: MADDPG
    stop:
        sampler_results/episode_reward_mean: 7.2
        timesteps_total: 20000
    config:
        # MADDPG only supports tf for now.
        framework: torch

        env_config:
            env_config:
              actions_are_logits: true

        num_steps_sampled_before_learning_starts: 200

        multiagent:
            policies:
              p0:
                - null
                - null
                - null
                - {
                    agent_id: 0
                }
              p1:
                - null
                - null
                - null
                - {
                    agent_id: 1
                }
            # YAML-capable policy_mapping_fn definition via providing a callable class here.
            policy_mapping_fn:
                type: ray.rllib.examples.multi_agent_and_self_play.policy_mapping_fn.PolicyMappingFn