__init__.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # Counters for sampling and training steps (env- and agent steps).
  2. NUM_ENV_STEPS_SAMPLED = "num_env_steps_sampled"
  3. NUM_AGENT_STEPS_SAMPLED = "num_agent_steps_sampled"
  4. NUM_ENV_STEPS_SAMPLED_THIS_ITER = "num_env_steps_sampled_this_iter"
  5. NUM_AGENT_STEPS_SAMPLED_THIS_ITER = "num_agent_steps_sampled_this_iter"
  6. NUM_ENV_STEPS_TRAINED = "num_env_steps_trained"
  7. NUM_AGENT_STEPS_TRAINED = "num_agent_steps_trained"
  8. NUM_ENV_STEPS_TRAINED_THIS_ITER = "num_env_steps_trained_this_iter"
  9. NUM_AGENT_STEPS_TRAINED_THIS_ITER = "num_agent_steps_trained_this_iter"
  10. # Counters for keeping track of worker weight updates (synchronization
  11. # between local worker and remote workers).
  12. NUM_SYNCH_WORKER_WEIGHTS = "num_weight_broadcasts"
  13. NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS = (
  14. "num_training_step_calls_since_last_synch_worker_weights"
  15. )
  16. # Number of total gradient updates that have been performed on a policy.
  17. NUM_GRAD_UPDATES_LIFETIME = "num_grad_updates_lifetime"
  18. # Average difference between the number of grad-updates that the policy/ies had
  19. # that collected the training batch vs the policy that was just updated (trained).
  20. # Good measuere for the off-policy'ness of training. Should be 0.0 for PPO and PG,
  21. # small for IMPALA and APPO, and any (larger) value for DQN and other off-policy algos.
  22. DIFF_NUM_GRAD_UPDATES_VS_SAMPLER_POLICY = "diff_num_grad_updates_vs_sampler_policy"
  23. # Counters to track target network updates.
  24. LAST_TARGET_UPDATE_TS = "last_target_update_ts"
  25. NUM_TARGET_UPDATES = "num_target_updates"
  26. # Performance timers (keys for Algorithm._timers).
  27. TRAINING_ITERATION_TIMER = "training_iteration"
  28. APPLY_GRADS_TIMER = "apply_grad"
  29. COMPUTE_GRADS_TIMER = "compute_grads"
  30. GARBAGE_COLLECTION_TIMER = "garbage_collection"
  31. SYNCH_WORKER_WEIGHTS_TIMER = "synch_weights"
  32. GRAD_WAIT_TIMER = "grad_wait"
  33. SAMPLE_TIMER = "sample"
  34. LEARN_ON_BATCH_TIMER = "learn"
  35. LOAD_BATCH_TIMER = "load"
  36. TARGET_NET_UPDATE_TIMER = "target_net_update"
  37. # learner
  38. LEARNER_STATS_KEY = "learner_stats"
  39. ALL_MODULES = "__all__"