intel-extension-for-pytorch
48 строк · 1.2 Кб
1{
2"architectures": [
3"MptForCausalLM"
4],
5"attn_config": {
6"alibi": true,
7"alibi_bias_max": 8,
8"attn_impl": "torch",
9"attn_pdrop": 0,
10"attn_type": "multihead_attention",
11"attn_uses_sequence_id": false,
12"clip_qkv": null,
13"prefix_lm": false,
14"qk_ln": false,
15"softmax_scale": null
16},
17"d_model": 2048,
18"emb_pdrop": 0,
19"embedding_fraction": 1.0,
20"expansion_ratio": 4,
21"init_config": {
22"emb_init_std": null,
23"emb_init_uniform_lim": null,
24"fan_mode": "fan_in",
25"init_div_is_residual": true,
26"init_gain": 0,
27"init_nonlinearity": "relu",
28"init_std": 0.02,
29"name": "kaiming_normal_",
30"verbose": 0
31},
32"init_device": "cpu",
33"learned_pos_emb": true,
34"logit_scale": null,
35"max_seq_len": 2048,
36"model_type": "mpt",
37"n_heads": 16,
38"n_layers": 1,
39"no_bias": true,
40"norm_type": "low_precision_layernorm",
41"resid_pdrop": 0,
42"tokenizer_name": "EleutherAI/gpt-neox-20b",
43"torch_dtype": "bfloat16",
44"transformers_version": "4.28.1",
45"use_cache": false,
46"verbose": 0,
47"vocab_size": 50432
48}