configuration_gptpangu.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. from transformers.configuration_utils import PretrainedConfig
  2. class GPTPanguConfig(PretrainedConfig):
  3. model_type = "gpt_pangu"
  4. keys_to_ignore_at_inference = ["past_key_values"]
  5. def __init__(
  6. self,
  7. vocab_size=40000,
  8. max_position_embeddings=1024,
  9. hidden_size=1024,
  10. intermediate_size=None,
  11. num_layers=24,
  12. num_heads=16,
  13. activation_function="gelu",
  14. resid_pdrop=0.1,
  15. embd_pdrop=0.1,
  16. attn_pdrop=0.1,
  17. layer_norm_epsilon=1e-5,
  18. scale_attn_weights=True,
  19. initializer_range=0.02,
  20. summary_type="cls_index",
  21. summary_use_proj=True,
  22. summary_activation=None,
  23. summary_proj_to_labels=True,
  24. summary_first_dropout=0.1,
  25. use_cache=True,
  26. # bos_token_id=9,
  27. # eos_token_id=9,
  28. **kwargs,
  29. ):
  30. self.vocab_size = vocab_size
  31. self.max_position_embeddings = max_position_embeddings
  32. self.hidden_size = hidden_size
  33. self.intermediate_size = intermediate_size
  34. self.num_layers = num_layers
  35. self.num_heads = num_heads
  36. self.activation_function = activation_function
  37. self.resid_pdrop = resid_pdrop
  38. self.embd_pdrop = embd_pdrop
  39. self.attn_pdrop = attn_pdrop
  40. self.layer_norm_epsilon = layer_norm_epsilon
  41. self.scale_attn_weights = scale_attn_weights
  42. self.initializer_range = initializer_range
  43. self.summary_type = summary_type
  44. self.summary_use_proj = summary_use_proj
  45. self.summary_activation = summary_activation
  46. self.summary_first_dropout = summary_first_dropout
  47. self.summary_proj_to_labels = summary_proj_to_labels
  48. self.use_cache = use_cache
  49. # self.bos_token_id = bos_token_id
  50. # self.eos_token_id = eos_token_id
  51. super().__init__(**kwargs)