todo_tests_currently_not_covered.yaml 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # ARS was never tested/tuned on Hopper. Maybe change to Reacher-v4?
  2. # ars-hopper-v4:
  3. # env: Hopper-v4
  4. # run: ARS
  5. # # Minimum reward and total ts (in given time_total_s) to pass this test.
  6. # pass_criteria:
  7. # sampler_results/episode_reward_mean: 100.0
  8. # timesteps_total: 2000000
  9. # stop:
  10. # time_total_s: 2000
  11. # config:
  12. # noise_stdev: 0.01
  13. # num_rollouts: 1
  14. # rollouts_used: 1
  15. # num_workers: 1
  16. # sgd_stepsize: 0.02
  17. # noise_size: 250000000
  18. # eval_prob: 0.2
  19. # offset: 0
  20. # observation_filter: NoFilter
  21. # report_length: 3
  22. # Basically the same as atari-ppo, but adapted for DDPPO. Note that DDPPO
  23. # isn't actually any more efficient on Atari, since the network size is
  24. # relatively small and the env doesn't require a GPU.
  25. # ddppo-breakoutnoframeskip-v5:
  26. # env: ALE/Breakout-v5
  27. # run: DDPPO
  28. # # Minimum reward and total ts (in given time_total_s) to pass this test.
  29. # pass_criteria:
  30. # sampler_results/episode_reward_mean: 50.0
  31. # timesteps_total: 10000000
  32. # stop:
  33. # time_total_s: 3600
  34. # config:
  35. # # DDPPO only supports PyTorch so far.
  36. # frameworks: [ "torch" ]
  37. # # Make analogous to old v4 + NoFrameskip.
  38. # env_config:
  39. # frameskip: 1
  40. # full_action_space: false
  41. # repeat_action_probability: 0.0
  42. # # Worker config: 10 workers, each of which requires a GPU.
  43. # num_workers: 16
  44. # # Workers require GPUs, but share 1 GPU amongst 2 workers.
  45. # num_gpus_per_worker: 0.25
  46. # # Each worker will sample 100 * 5 envs per worker steps = 500 steps
  47. # # per optimization round. This is 5000 steps summed across workers.
  48. # rollout_fragment_length: 100
  49. # num_envs_per_worker: 5
  50. # # Each worker will take a minibatch of 50. There are 10 workers total,
  51. # # so the effective minibatch size will be 500.
  52. # sgd_minibatch_size: 50
  53. # num_sgd_iter: 30
  54. # # Params from standard PPO Atari config:
  55. # lambda: 0.95
  56. # kl_coeff: 0.5
  57. # clip_rewards: true
  58. # clip_param: 0.1
  59. # vf_loss_coeff: 0.1
  60. # vf_clip_param: 10.0
  61. # entropy_coeff: 0.01
  62. # batch_mode: truncate_episodes
  63. # observation_filter: NoFilter