cartpole-pg: env: CartPole-v1 run: PG stop: sampler_results/episode_reward_mean: 150 timesteps_total: 100000 config: # Works for both torch and tf. framework: torch num_workers: 0