openoker
/
GeneFacePlusPlus


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
							# dataset-related
raw_data_dir: data/raw/videos
processed_data_dir: data/processed/videos
binary_data_dir: data/binary/videos
video_id: ''
task_cls: ''
not_save_modules: ['criterion_lpips']

# project-related
work_dir: ''
load_ckpt: ''
tb_log_interval: 100
num_ckpt_keep: 1
val_check_interval: 2000
valid_infer_interval: 10000
num_sanity_val_steps: 2
num_valid_plots: 5
eval_max_batches: 100 # num_test_plots
print_nan_grads: false
resume_from_checkpoint: 0 # specify the step, 0 for latest
amp: false
valid_monitor_key: val_loss
valid_monitor_mode: min
save_best: true
debug: false
save_codes:
- tasks
- modules
- egs

# testing related
save_gt: true

# training-scheme-related
seed: 9999
lr: 0.0005
scheduler: exponential # exponential|rsqrt|warmup|none|step_lr
warmup_updates: 0
optimizer_adam_beta1: 0.9
optimizer_adam_beta2: 0.999
weight_decay: 0
clip_grad_norm: 0 # disable grad clipping 
clip_grad_value: 0 # disable grad clipping 
accumulate_grad_batches: 1

# model-related
cond_type: '' # deepspeech, esperanto, idexp_lm3d

# training
amp: true # use fp16
load_imgs_to_memory: true # load uint8 training img to memory, which reduce io costs, at the expense of more memory occupation

# NeRF-related 
near: 0.3
far: 0.9
n_rays: 65536 # num rays sampled per image for each training step, default 256*256
cuda_ray: true # use CUDA raymarching instead of pytorch
max_steps: 16 # max num steps sampled per ray (only valid when using --cuda_ray)
num_steps: 16 # num steps sampled per ray (only valid when NOT using --cuda_ray)
upsample_steps: 0 # num steps up-sampled per ray (only valid when NOT using --cuda_ray)
update_extra_interval: 16 # iter interval to update extra status (only valid when using --cuda_ray)
max_ray_batch: 4096 # batch size of rays at inference to avoid OOM (only valid when NOT using --cuda_ray)


max_updates: 25_0000 # 40_0000 for training the whole head, 5_0000 for finetuning the mouth
finetune_lips: true
finetune_lips_start_iter: 20_0000
lambda_lpips_loss: 0.01 # auxiliary loss for finetune lips
lambda_weights_entropy: 0.0001
lambda_ambient: 0.1

min_near: 0.05 # minimum near distance for camera
bound: 1 # assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.
camera_scale: 4. # scale camera location into box[-bound, bound]^3
camera_offset: [0, 0, 0] # offset of camera location
grid_size: 128
desired_resolution: 2048
log2_hashmap_size: 16
dt_gamma: 0.00390625 # default 1/256, dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)
density_thresh: 10 # threshold for density grid to be occupied (sigma)
density_thresh_torso: 0.01 # threshold for density grid to be occupied (alpha)
torso_shrink: 0.8 # shrink bg coords to allow more flexibility in deform
 
smooth_lips: false

# Network
grid_type: tiledgrid # tiledgrid or hashgrid
grid_interpolation_type: linear # smoothstep or linear
with_att: true
use_window_cond: true
torso_head_aware: false # head aware torso nerf to avoid head-torso separation artifacts!
num_layers_sigma: 3
hidden_dim_sigma: 128 # 64 by radnerf is too small
geo_feat_dim: 128 # 64 by radnerf is too small
num_layers_color: 2
hidden_dim_color: 128 # 64 by radnerf is too small
cond_out_dim: 64
num_layers_ambient: 3
hidden_dim_ambient: 128 # 64 by radnerf is too small
ambient_coord_dim: 2
individual_embedding_num: 13000
individual_embedding_dim: 4
torso_individual_embedding_dim: 8

# infer
infer_cond_name: ''
infer_out_video_name: ''
infer_scale_factor: 1.0
infer_smo_std: 0.
infer_audio_source_name: ''
infer_c2w_name: ''
infer_lm3d_clamp_std: 1.5
infer_lm3d_lle_percent: 0.25 # percent of lle fused feature to compose the processed lm3d
infer_lm3d_smooth_sigma: 0. # sigma of gaussian kernel to smooth the predicted lm3d
infer_bg_img_fname: '' # black, white, or a img fname
infer_smooth_camera_path: true
infer_smooth_camera_path_kernel_size: 7

# gui feat
gui_w: 512
gui_h: 512
gui_radius: 3.35
gui_fovy: 21.24
gui_max_spp: 1 # GUI rendering max sample per pixel

load_imgs_to_memory: false # load uint8 training img to memory, which reduce io costs, at the expense of more memory occupation