cartpole-es: env: CartPole-v0 run: ES stop: episode_reward_mean: 100 timesteps_total: 500000 config: # Works for both torch and tf. framework: tf num_workers: 2 noise_size: 25000000 episodes_per_batch: 50