guided_ldm_inpaint4_v15.yaml 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. model:
  2. target: manga_translator.inpainting.guided_ldm_inpainting.GuidedLDM
  3. params:
  4. linear_start: 0.00085
  5. linear_end: 0.0120
  6. num_timesteps_cond: 1
  7. log_every_t: 200
  8. timesteps: 1000
  9. first_stage_key: "jpg"
  10. cond_stage_key: "txt"
  11. image_size: 64
  12. channels: 4
  13. cond_stage_trainable: false
  14. conditioning_key: crossattn
  15. monitor: val/loss_simple_ema
  16. scale_factor: 0.18215
  17. use_ema: False
  18. unet_config:
  19. target: manga_translator.inpainting.ldm.modules.diffusionmodules.openaimodel.UNetModel
  20. params:
  21. image_size: 32 # unused
  22. in_channels: 4
  23. out_channels: 4
  24. model_channels: 320
  25. attention_resolutions: [ 4, 2, 1 ]
  26. num_res_blocks: 2
  27. channel_mult: [ 1, 2, 4, 4 ]
  28. num_heads: 8
  29. use_spatial_transformer: True
  30. transformer_depth: 1
  31. context_dim: 768
  32. use_checkpoint: True
  33. legacy: False
  34. first_stage_config:
  35. target: manga_translator.inpainting.ldm.models.autoencoder.AutoencoderKL
  36. params:
  37. embed_dim: 4
  38. monitor: val/rec_loss
  39. ddconfig:
  40. double_z: true
  41. z_channels: 4
  42. resolution: 256
  43. in_channels: 3
  44. out_ch: 3
  45. ch: 128
  46. ch_mult:
  47. - 1
  48. - 2
  49. - 4
  50. - 4
  51. num_res_blocks: 2
  52. attn_resolutions: []
  53. dropout: 0.0
  54. lossconfig:
  55. target: torch.nn.Identity
  56. cond_stage_config:
  57. target: manga_translator.inpainting.ldm.modules.encoders.modules.FrozenCLIPEmbedder