diff options
author | cebtenzzre <cebtenzzre@gmail.com> | 2023-10-15 02:32:06 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-15 09:32:06 +0300 |
commit | 11bff290458f12f020b588792707f76ec658a27a (patch) | |
tree | 1f37e1f536551b0d6d760530ea0896e67d78cc6e /llama.cpp | |
parent | 11dc1091f64b24ca6d643acc6d0051117ba60161 (diff) |
MPT : support GQA for replit-code-v1.5 (#3627)
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 6 |
1 files changed, 3 insertions, 3 deletions
@@ -2839,8 +2839,8 @@ static void llm_load_tensors( auto & layer = model.layers[i]; layer.attn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, backend); - layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, 3*n_embd}, backend_split); - layer.wo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, backend_split); + layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, backend_split); + layer.wo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, backend_split); layer.ffn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, backend); @@ -5368,7 +5368,7 @@ static struct ggml_cgraph * llm_build_mpt( const int64_t n_layer = hparams.n_layer; const int64_t n_ctx = cparams.n_ctx; const int64_t n_head = hparams.n_head; - const int64_t n_head_kv = hparams.n_head_kv; // == n_head for MPT, as there's no MQA/GQA + const int64_t n_head_kv = hparams.n_head_kv; const int64_t n_embd_head = hparams.n_embd_head(); const int64_t n_embd_gqa = hparams.n_embd_gqa(); |