base.yaml 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # dataset-related
  2. raw_data_dir: data/raw/videos
  3. processed_data_dir: data/processed/videos
  4. binary_data_dir: data/binary/videos
  5. video_id: ''
  6. task_cls: ''
  7. not_save_modules: ['criterion_lpips']
  8. # project-related
  9. work_dir: ''
  10. load_ckpt: ''
  11. tb_log_interval: 100
  12. num_ckpt_keep: 1
  13. val_check_interval: 2000
  14. valid_infer_interval: 10000
  15. num_sanity_val_steps: 2
  16. num_valid_plots: 5
  17. eval_max_batches: 100 # num_test_plots
  18. print_nan_grads: false
  19. resume_from_checkpoint: 0 # specify the step, 0 for latest
  20. amp: false
  21. valid_monitor_key: val_loss
  22. valid_monitor_mode: min
  23. save_best: true
  24. debug: false
  25. save_codes:
  26. - tasks
  27. - modules
  28. - egs
  29. # testing related
  30. save_gt: true
  31. # training-scheme-related
  32. seed: 9999
  33. lr: 0.0005
  34. scheduler: exponential # exponential|rsqrt|warmup|none|step_lr
  35. warmup_updates: 0
  36. optimizer_adam_beta1: 0.9
  37. optimizer_adam_beta2: 0.999
  38. weight_decay: 0
  39. clip_grad_norm: 0 # disable grad clipping
  40. clip_grad_value: 0 # disable grad clipping
  41. accumulate_grad_batches: 1
  42. # model-related
  43. cond_type: '' # deepspeech, esperanto, idexp_lm3d
  44. # training
  45. amp: true # use fp16
  46. load_imgs_to_memory: true # load uint8 training img to memory, which reduce io costs, at the expense of more memory occupation
  47. # NeRF-related
  48. near: 0.3
  49. far: 0.9
  50. n_rays: 65536 # num rays sampled per image for each training step, default 256*256
  51. cuda_ray: true # use CUDA raymarching instead of pytorch
  52. max_steps: 16 # max num steps sampled per ray (only valid when using --cuda_ray)
  53. num_steps: 16 # num steps sampled per ray (only valid when NOT using --cuda_ray)
  54. upsample_steps: 0 # num steps up-sampled per ray (only valid when NOT using --cuda_ray)
  55. update_extra_interval: 16 # iter interval to update extra status (only valid when using --cuda_ray)
  56. max_ray_batch: 4096 # batch size of rays at inference to avoid OOM (only valid when NOT using --cuda_ray)
  57. max_updates: 25_0000 # 40_0000 for training the whole head, 5_0000 for finetuning the mouth
  58. finetune_lips: true
  59. finetune_lips_start_iter: 20_0000
  60. lambda_lpips_loss: 0.01 # auxiliary loss for finetune lips
  61. lambda_weights_entropy: 0.0001
  62. lambda_ambient: 0.1
  63. min_near: 0.05 # minimum near distance for camera
  64. bound: 1 # assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.
  65. camera_scale: 4. # scale camera location into box[-bound, bound]^3
  66. camera_offset: [0, 0, 0] # offset of camera location
  67. grid_size: 128
  68. desired_resolution: 2048
  69. log2_hashmap_size: 16
  70. dt_gamma: 0.00390625 # default 1/256, dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)
  71. density_thresh: 10 # threshold for density grid to be occupied (sigma)
  72. density_thresh_torso: 0.01 # threshold for density grid to be occupied (alpha)
  73. torso_shrink: 0.8 # shrink bg coords to allow more flexibility in deform
  74. smooth_lips: false
  75. # Network
  76. grid_type: tiledgrid # tiledgrid or hashgrid
  77. grid_interpolation_type: linear # smoothstep or linear
  78. with_att: true
  79. use_window_cond: true
  80. torso_head_aware: false # head aware torso nerf to avoid head-torso separation artifacts!
  81. num_layers_sigma: 3
  82. hidden_dim_sigma: 128 # 64 by radnerf is too small
  83. geo_feat_dim: 128 # 64 by radnerf is too small
  84. num_layers_color: 2
  85. hidden_dim_color: 128 # 64 by radnerf is too small
  86. cond_out_dim: 64
  87. num_layers_ambient: 3
  88. hidden_dim_ambient: 128 # 64 by radnerf is too small
  89. ambient_coord_dim: 2
  90. individual_embedding_num: 13000
  91. individual_embedding_dim: 4
  92. torso_individual_embedding_dim: 8
  93. # infer
  94. infer_cond_name: ''
  95. infer_out_video_name: ''
  96. infer_scale_factor: 1.0
  97. infer_smo_std: 0.
  98. infer_audio_source_name: ''
  99. infer_c2w_name: ''
  100. infer_lm3d_clamp_std: 1.5
  101. infer_lm3d_lle_percent: 0.25 # percent of lle fused feature to compose the processed lm3d
  102. infer_lm3d_smooth_sigma: 0. # sigma of gaussian kernel to smooth the predicted lm3d
  103. infer_bg_img_fname: '' # black, white, or a img fname
  104. infer_smooth_camera_path: true
  105. infer_smooth_camera_path_kernel_size: 7
  106. # gui feat
  107. gui_w: 512
  108. gui_h: 512
  109. gui_radius: 3.35
  110. gui_fovy: 21.24
  111. gui_max_spp: 1 # GUI rendering max sample per pixel
  112. load_imgs_to_memory: false # load uint8 training img to memory, which reduce io costs, at the expense of more memory occupation